#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
library(vip)
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)                                                  
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
## 
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
## 
##     nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
## 
##     compare
## The following object is masked from 'package:class':
## 
##     knn
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8    2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## The following object is masked from 'package:kernlab':
## 
##     alpha
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%()       masks igraph::%--%()
## ✖ ggplot2::alpha()        masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine()        masks randomForest::combine()
## ✖ purrr::compose()        masks igraph::compose()
## ✖ purrr::cross()          masks kernlab::cross()
## ✖ tidyr::crossing()       masks igraph::crossing()
## ✖ tidyr::expand()         masks Matrix::expand()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ ggplot2::margin()       masks randomForest::margin()
## ✖ purrr::none()           masks locfit::none()
## ✖ tidyr::pack()           masks Matrix::pack()
## ✖ purrr::simplify()       masks igraph::simplify()
## ✖ tidyr::unpack()         masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
## 
## The following objects are masked from 'package:rstanarm':
## 
##     compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2025 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
## 
## Attaching package: 'TDA'
## 
## The following object is masked from 'package:cluster':
## 
##     silhouette
library(TDAstats)
library(ks)
## 
## Attaching package: 'ks'
## 
## The following object is masked from 'package:TDA':
## 
##     kde
## 
## The following object is masked from 'package:MCMCpack':
## 
##     vech
## 
## The following object is masked from 'package:igraph':
## 
##     compare
## 
## The following object is masked from 'package:BayesFactor':
## 
##     compare
#install.packages('MLmetrics')
library(MLmetrics)
## 
## Attaching package: 'MLmetrics'
## 
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## 
## The following object is masked from 'package:base':
## 
##     Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
#import adult dataset from UCI repository stored on my desktop

#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
  head(str(adult))
## 'data.frame':    32561 obs. of  15 variables:
##  $ V1 : int  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2 : chr  " State-gov" " Self-emp-not-inc" " Private" " Private" ...
##  $ V3 : int  77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
##  $ V4 : chr  " Bachelors" " Bachelors" " HS-grad" " 11th" ...
##  $ V5 : int  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6 : chr  " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
##  $ V7 : chr  " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
##  $ V8 : chr  " Not-in-family" " Husband" " Not-in-family" " Husband" ...
##  $ V9 : chr  " White" " White" " White" " Black" ...
##  $ V10: chr  " Male" " Male" " Male" " Male" ...
##  $ V11: int  2174 0 0 0 0 0 0 0 14084 5178 ...
##  $ V12: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13: int  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14: chr  " United-States" " United-States" " United-States" " United-States" ...
##  $ V15: chr  " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
  summary(adult)
##        V1             V2                  V3               V4           
##  Min.   :17.00   Length:32561       Min.   :  12285   Length:32561      
##  1st Qu.:28.00   Class :character   1st Qu.: 117827   Class :character  
##  Median :37.00   Mode  :character   Median : 178356   Mode  :character  
##  Mean   :38.58                      Mean   : 189778                     
##  3rd Qu.:48.00                      3rd Qu.: 237051                     
##  Max.   :90.00                      Max.   :1484705                     
##        V5             V6                 V7                 V8           
##  Min.   : 1.00   Length:32561       Length:32561       Length:32561      
##  1st Qu.: 9.00   Class :character   Class :character   Class :character  
##  Median :10.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :10.08                                                           
##  3rd Qu.:12.00                                                           
##  Max.   :16.00                                                           
##       V9                V10                 V11             V12        
##  Length:32561       Length:32561       Min.   :    0   Min.   :   0.0  
##  Class :character   Class :character   1st Qu.:    0   1st Qu.:   0.0  
##  Mode  :character   Mode  :character   Median :    0   Median :   0.0  
##                                        Mean   : 1078   Mean   :  87.3  
##                                        3rd Qu.:    0   3rd Qu.:   0.0  
##                                        Max.   :99999   Max.   :4356.0  
##       V13            V14                V15           
##  Min.   : 1.00   Length:32561       Length:32561      
##  1st Qu.:40.00   Class :character   Class :character  
##  Median :40.00   Mode  :character   Mode  :character  
##  Mean   :40.44                                        
##  3rd Qu.:45.00                                        
##  Max.   :99.00
  ggpairs(adult[,c(1,3,5,11,12,13,15)])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  ggpairs(adult, columns = c(1,3,5,11,12,13,15), aes(color = V15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Add Bayesian tests functions

#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {

  library(MCMCpack)

  samples <- 3000

  #build the vector 0.5 1 1 ....... 1 

  weights <- c(0.5,rep(1,length(diffVector)))

  #add the fake first observation in 0

  diffVector <- c (0, diffVector)  


  #for the moment we implement the sign test. Signedrank will follows

  probLeft <- mean (diffVector < rope_min)

  probRope <- mean (diffVector > rope_min & diffVector < rope_max)

  probRight <- mean (diffVector > rope_max)

  results = list ("probLeft"=probLeft, "probRope"=probRope,
                  
                  "probRight"=probRight)
  
  return (results)
}


##Create function to conduct Bayesian Signed Rank Test

BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
  
  library(MCMCpack)
  
  samples <- 30000
  
  #build the vector 0.5 1 1 ....... 1
  weights <- c(0.5,rep(1,length(diffVector)))
  
  #add the fake first observation in 0
  diffVector <- c (0, diffVector)
  
  sampledWeights <- rdirichlet(samples,weights)
  
  winLeft <- vector(length = samples)
  winRope <- vector(length = samples)
  winRight <- vector(length = samples)
  
  for (rep in 1:samples){
    currentWeights <- sampledWeights[rep,]
    for (i in 1:length(currentWeights)){
      for (j in 1:length(currentWeights)){
        product= currentWeights[i] * currentWeights[j]
        if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
          winRight[rep] <- winRight[rep] + product
        }
        else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
          winRope[rep] <- winRope[rep] + product
        }
        else {
          winLeft[rep] <- winLeft[rep] + product
        }

      }
    }
    maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
    winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
    winRight[rep] <- (winRight[rep]==maxWins)*1/winners
    winRope[rep] <- (winRope[rep]==maxWins)*1/winners
    winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
  }
  
  
  results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
                  "winRight"=mean(winRight) )
  return (results)
  
}


#Create function to conduct the Bayesian Correlated t.test

#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.

#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
 
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
   if (rope_max < rope_min){
     stop("rope_max should be larger than rope_min")
   }
     
  delta <- mean(diff_a_b)
  n <- length(diff_a_b)
  df <- n-1
  stdX <- sd(diff_a_b)
  sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
  p.left <- pt((rope_min - delta)/sp, df)
  p.rope <- pt((rope_max - delta)/sp, df)-p.left
  results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
  return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)

#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)

#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))


#str final data frame
head(str(adult.one_hot_df))
## 'data.frame':    32561 obs. of  110 variables:
##  $ V1                            : num  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  0 0 1 1 1 1 1 0 1 1 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 1 0 0 0 0 0 1 0 0 ...
##  $ V2.State.gov                  : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  77516 83311 215646 234721 338409 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  1 1 0 0 1 0 0 0 0 1 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 1 0 0 0 0 1 0 0 ...
##  $ V4.Masters                    : num  0 0 0 0 0 1 0 0 1 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V5                            : num  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6.Divorced                   : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  0 1 0 1 1 1 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V6.Never.married              : num  1 0 0 0 0 0 0 0 1 0 ...
##  $ V6.Separated                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Adm.clerical               : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 1 0 0 0 1 0 1 0 1 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  0 1 0 1 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  1 0 1 0 0 0 1 0 1 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Unmarried                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 1 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 1 1 0 1 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 0 0 1 0 1 1 1 ...
##  $ V10.Female                    : num  0 0 0 0 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 1 1 1 0 0 0 1 0 1 ...
##  $ V11                           : num  2174 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]

##Persistent homology of adult dataset

#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset. 

adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame':    1000 obs. of  110 variables:
##  $ V1                            : num  33 25 39 21 32 26 20 58 24 63 ...
##  $ V2..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  1 1 1 1 1 1 0 0 1 0 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ V2.State.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  176992 105693 234901 198050 134886 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  0 1 0 0 0 1 0 0 0 0 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V4.Masters                    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 1 0 0 1 ...
##  $ V5                            : num  14 13 12 12 9 13 10 7 9 10 ...
##  $ V6.Divorced                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  1 0 0 0 1 0 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Never.married              : num  0 1 0 1 0 1 1 0 1 0 ...
##  $ V6.Separated                  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V7.Adm.clerical               : num  0 0 1 1 1 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 0 0 1 1 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  1 1 0 0 0 1 0 0 0 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  1 0 0 0 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  0 1 0 1 0 1 0 0 0 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ V8.Unmarried                  : num  0 0 1 0 0 0 1 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V10.Female                    : num  0 1 0 1 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 0 1 0 0 0 0 1 0 1 ...
##  $ V11                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 40 40 25 40 40 20 16 25 48 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
summary(adult.one_hot_1000_df)
##        V1             V2..       V2.Federal.gov   V2.Local.gov  
##  Min.   :17.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:28.00   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :37.00   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :38.64   Mean   :0.077   Mean   :0.025   Mean   :0.064  
##  3rd Qu.:47.00   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :90.00   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V2.Never.worked   V2.Private    V2.Self.emp.inc V2.Self.emp.not.inc
##  Min.   :0       Min.   :0.000   Min.   :0.000   Min.   :0.000      
##  1st Qu.:0       1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000      
##  Median :0       Median :1.000   Median :0.000   Median :0.000      
##  Mean   :0       Mean   :0.679   Mean   :0.037   Mean   :0.079      
##  3rd Qu.:0       3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:0.000      
##  Max.   :0       Max.   :1.000   Max.   :1.000   Max.   :1.000      
##   V2.State.gov   V2.Without.pay       V3            V4.10th     
##  Min.   :0.000   Min.   :0      Min.   : 19302   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0      1st Qu.:123797   1st Qu.:0.000  
##  Median :0.000   Median :0      Median :181982   Median :0.000  
##  Mean   :0.039   Mean   :0      Mean   :195583   Mean   :0.041  
##  3rd Qu.:0.000   3rd Qu.:0      3rd Qu.:242529   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :0      Max.   :721161   Max.   :1.000  
##     V4.11th         V4.12th        V4.1st.4th      V4.5th.6th   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.032   Mean   :0.015   Mean   :0.005   Mean   :0.015  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##    V4.7th.8th        V4.9th      V4.Assoc.acdm   V4.Assoc.voc    V4.Bachelors  
##  Min.   :0.000   Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.00   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.00   Median :0.000   Median :0.000  
##  Mean   :0.015   Mean   :0.018   Mean   :0.04   Mean   :0.052   Mean   :0.155  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.00   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.00   Max.   :1.000   Max.   :1.000  
##   V4.Doctorate     V4.HS.grad      V4.Masters     V4.Preschool  
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.014   Mean   :0.327   Mean   :0.053   Mean   :0.002  
##  3rd Qu.:0.000   3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V4.Prof.school  V4.Some.college       V5      V6.Divorced   
##  Min.   :0.000   Min.   :0.000   Min.   : 1   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.: 9   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :10   Median :0.000  
##  Mean   :0.014   Mean   :0.202   Mean   :10   Mean   :0.132  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:12   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :16   Max.   :1.000  
##  V6.Married.AF.spouse V6.Married.civ.spouse V6.Married.spouse.absent
##  Min.   :0            Min.   :0.000         Min.   :0.000           
##  1st Qu.:0            1st Qu.:0.000         1st Qu.:0.000           
##  Median :0            Median :0.000         Median :0.000           
##  Mean   :0            Mean   :0.464         Mean   :0.005           
##  3rd Qu.:0            3rd Qu.:1.000         3rd Qu.:0.000           
##  Max.   :0            Max.   :1.000         Max.   :1.000           
##  V6.Never.married  V6.Separated     V6.Widowed         V7..      
##  Min.   :0.000    Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000    1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000    Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.325    Mean   :0.041   Mean   :0.033   Mean   :0.077  
##  3rd Qu.:1.000    3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000    Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V7.Adm.clerical V7.Armed.Forces V7.Craft.repair V7.Exec.managerial
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000     
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000     
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000     
##  Mean   :0.117   Mean   :0.001   Mean   :0.129   Mean   :0.124     
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000     
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000     
##  V7.Farming.fishing V7.Handlers.cleaners V7.Machine.op.inspct V7.Other.service
##  Min.   :0.000      Min.   :0.000        Min.   :0.000        Min.   :0.000   
##  1st Qu.:0.000      1st Qu.:0.000        1st Qu.:0.000        1st Qu.:0.000   
##  Median :0.000      Median :0.000        Median :0.000        Median :0.000   
##  Mean   :0.029      Mean   :0.041        Mean   :0.071        Mean   :0.091   
##  3rd Qu.:0.000      3rd Qu.:0.000        3rd Qu.:0.000        3rd Qu.:0.000   
##  Max.   :1.000      Max.   :1.000        Max.   :1.000        Max.   :1.000   
##  V7.Priv.house.serv V7.Prof.specialty V7.Protective.serv    V7.Sales    
##  Min.   :0.000      Min.   :0.000     Min.   :0.000      Min.   :0.000  
##  1st Qu.:0.000      1st Qu.:0.000     1st Qu.:0.000      1st Qu.:0.000  
##  Median :0.000      Median :0.000     Median :0.000      Median :0.000  
##  Mean   :0.003      Mean   :0.119     Mean   :0.018      Mean   :0.102  
##  3rd Qu.:0.000      3rd Qu.:0.000     3rd Qu.:0.000      3rd Qu.:0.000  
##  Max.   :1.000      Max.   :1.000     Max.   :1.000      Max.   :1.000  
##  V7.Tech.support V7.Transport.moving   V8.Husband   V8.Not.in.family
##  Min.   :0.000   Min.   :0.000       Min.   :0.00   Min.   :0.000   
##  1st Qu.:0.000   1st Qu.:0.000       1st Qu.:0.00   1st Qu.:0.000   
##  Median :0.000   Median :0.000       Median :0.00   Median :0.000   
##  Mean   :0.035   Mean   :0.043       Mean   :0.41   Mean   :0.261   
##  3rd Qu.:0.000   3rd Qu.:0.000       3rd Qu.:1.00   3rd Qu.:1.000   
##  Max.   :1.000   Max.   :1.000       Max.   :1.00   Max.   :1.000   
##  V8.Other.relative  V8.Own.child    V8.Unmarried      V8.Wife     
##  Min.   :0.000     Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000     1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000     Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.027     Mean   :0.136   Mean   :0.115   Mean   :0.051  
##  3rd Qu.:0.000     3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000     Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V9.Amer.Indian.Eskimo V9.Asian.Pac.Islander    V9.Black        V9.Other    
##  Min.   :0.000         Min.   :0.000         Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000         1st Qu.:0.000         1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000         Median :0.000         Median :0.000   Median :0.000  
##  Mean   :0.014         Mean   :0.029         Mean   :0.104   Mean   :0.007  
##  3rd Qu.:0.000         3rd Qu.:0.000         3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000         Max.   :1.000         Max.   :1.000   Max.   :1.000  
##     V9.White       V10.Female       V10.Male          V11         
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0.0  
##  1st Qu.:1.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:    0.0  
##  Median :1.000   Median :0.000   Median :1.000   Median :    0.0  
##  Mean   :0.846   Mean   :0.339   Mean   :0.661   Mean   :  868.9  
##  3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:    0.0  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :99999.0  
##       V12               V13           V14..        V14.Cambodia   V14.Canada   
##  Min.   :   0.00   Min.   : 1.0   Min.   :0.000   Min.   :0     Min.   :0.000  
##  1st Qu.:   0.00   1st Qu.:40.0   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000  
##  Median :   0.00   Median :40.0   Median :0.000   Median :0     Median :0.000  
##  Mean   :  92.56   Mean   :40.5   Mean   :0.024   Mean   :0     Mean   :0.003  
##  3rd Qu.:   0.00   3rd Qu.:45.0   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000  
##  Max.   :2457.00   Max.   :99.0   Max.   :1.000   Max.   :0     Max.   :1.000  
##    V14.China      V14.Columbia      V14.Cuba     V14.Dominican.Republic
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000         
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000         
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000         
##  Mean   :0.003   Mean   :0.002   Mean   :0.005   Mean   :0.002         
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000         
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000         
##   V14.Ecuador    V14.El.Salvador  V14.England      V14.France   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.001   Mean   :0.003   Mean   :0.003   Mean   :0.001  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##   V14.Germany      V14.Greece    V14.Guatemala   V14.Haiti    
##  Min.   :0.000   Min.   :0.000   Min.   :0     Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0     Median :0.000  
##  Mean   :0.002   Mean   :0.002   Mean   :0     Mean   :0.002  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :0     Max.   :1.000  
##  V14.Holand.Netherlands  V14.Honduras    V14.Hong      V14.Hungary   
##  Min.   :0              Min.   :0     Min.   :0.000   Min.   :0.000  
##  1st Qu.:0              1st Qu.:0     1st Qu.:0.000   1st Qu.:0.000  
##  Median :0              Median :0     Median :0.000   Median :0.000  
##  Mean   :0              Mean   :0     Mean   :0.001   Mean   :0.002  
##  3rd Qu.:0              3rd Qu.:0     3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :0              Max.   :0     Max.   :1.000   Max.   :1.000  
##    V14.India        V14.Iran  V14.Ireland   V14.Italy      V14.Jamaica   
##  Min.   :0.000   Min.   :0   Min.   :0    Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0   1st Qu.:0    1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0   Median :0    Median :0.000   Median :0.000  
##  Mean   :0.004   Mean   :0   Mean   :0    Mean   :0.003   Mean   :0.003  
##  3rd Qu.:0.000   3rd Qu.:0   3rd Qu.:0    3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :0   Max.   :0    Max.   :1.000   Max.   :1.000  
##    V14.Japan        V14.Laos   V14.Mexico    V14.Nicaragua
##  Min.   :0.000   Min.   :0   Min.   :0.000   Min.   :0    
##  1st Qu.:0.000   1st Qu.:0   1st Qu.:0.000   1st Qu.:0    
##  Median :0.000   Median :0   Median :0.000   Median :0    
##  Mean   :0.003   Mean   :0   Mean   :0.022   Mean   :0    
##  3rd Qu.:0.000   3rd Qu.:0   3rd Qu.:0.000   3rd Qu.:0    
##  Max.   :1.000   Max.   :0   Max.   :1.000   Max.   :0    
##  V14.Outlying.US.Guam.USVI.etc.    V14.Peru V14.Philippines   V14.Poland   
##  Min.   :0                      Min.   :0   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0                      1st Qu.:0   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0                      Median :0   Median :0.000   Median :0.000  
##  Mean   :0                      Mean   :0   Mean   :0.004   Mean   :0.002  
##  3rd Qu.:0                      3rd Qu.:0   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :0                      Max.   :0   Max.   :1.000   Max.   :1.000  
##   V14.Portugal   V14.Puerto.Rico  V14.Scotland   V14.South       V14.Taiwan   
##  Min.   :0.000   Min.   :0.000   Min.   :0     Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0     Median :0.000   Median :0.000  
##  Mean   :0.001   Mean   :0.004   Mean   :0     Mean   :0.001   Mean   :0.001  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :0     Max.   :1.000   Max.   :1.000  
##   V14.Thailand V14.Trinadad.Tobago V14.United.States  V14.Vietnam   
##  Min.   :0     Min.   :0.000       Min.   :0.000     Min.   :0.000  
##  1st Qu.:0     1st Qu.:0.000       1st Qu.:1.000     1st Qu.:0.000  
##  Median :0     Median :0.000       Median :1.000     Median :0.000  
##  Mean   :0     Mean   :0.002       Mean   :0.891     Mean   :0.003  
##  3rd Qu.:0     3rd Qu.:0.000       3rd Qu.:1.000     3rd Qu.:0.000  
##  Max.   :0     Max.   :1.000       Max.   :1.000     Max.   :1.000  
##  V14.Yugoslavia   V15...50K        V15..50K    
##  Min.   :0      Min.   :0.000   Min.   :0.000  
##  1st Qu.:0      1st Qu.:1.000   1st Qu.:0.000  
##  Median :0      Median :1.000   Median :0.000  
##  Mean   :0      Mean   :0.769   Mean   :0.231  
##  3rd Qu.:0      3rd Qu.:1.000   3rd Qu.:0.000  
##  Max.   :0      Max.   :1.000   Max.   :1.000
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)

# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————

#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)] 

##Two Filter Functions PCA & KDE

#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))

#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate


###*** Adult PCA  Mapper 5 intervals, 60% overlap, 5 bins
##*** Adult PCA Mapper 5 intervals, 60% overlap, 5 bins

m_adult_5.60.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 60,
     num_bins_when_clustering = 5)


g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.60.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.60.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_adult_5.60.5$points_in_vertex))
## List of 5
##  $ : int [1:6560] 2 8 10 11 12 15 21 26 28 39 ...
##  $ : int [1:13933] 2 8 10 11 12 15 19 20 21 23 ...
##  $ : int [1:15744] 1 2 3 4 5 6 9 11 15 16 ...
##  $ : int [1:19829] 1 3 4 5 6 9 13 14 16 17 ...
##  $ : int [1:16508] 1 3 5 7 13 14 17 18 22 25 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.60.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.60.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

m_adult_5.60.5.n1<-m_adult_5.60.5$points_in_vertex[1]
    m_adult_5.60.5.n1.vec<-as.vector(unlist(m_adult_5.60.5.n1))
m_adult_5.60.5.n2<-m_adult_5.60.5$points_in_vertex[2]
    m_adult_5.60.5.n2.vec<-as.vector(unlist(m_adult_5.60.5.n2)) 
m_adult_5.60.5.n3<-m_adult_5.60.5$points_in_vertex[3]
    m_adult_5.60.5.n3.vec<-as.vector(unlist(m_adult_5.60.5.n3))
m_adult_5.60.5.n4<-m_adult_5.60.5$points_in_vertex[4]
    m_adult_5.60.5.n4.vec<-as.vector(unlist(m_adult_5.60.5.n4)) 
m_adult_5.60.5.n5<-m_adult_5.60.5$points_in_vertex[5]
    m_adult_5.60.5.n5.vec<-as.vector(unlist(m_adult_5.60.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_adult_5.60.5.n1.vec,]
tda.m_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_adult_5.60.5.n2.vec,]
tda.m_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_adult_5.60.5.n3.vec,]
tda.m_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_adult_5.60.5.n4.vec,]
tda.m_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_adult_5.60.5.n5.vec,]



##*** Adult Mapper KDE Filter 5 intervals, 60% overlap, 5 bins

m_kde_adult_5.60.5 <- mapper1D(
      distance_matrix = dist(adult.one_hot_df),
      filter_values = c(filter.kde),
      num_intervals = 5,
      percent_overlap = 60,
      num_bins_when_clustering = 5)


g_kde_adult_5.60.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5))

head(str(m_kde_adult_5.60.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.60.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.60.5$points_in_vertex))
## List of 5
##  $ : int [1:15260] 2 4 5 6 7 9 13 16 19 20 ...
##  $ : int [1:14482] 1 2 4 6 8 9 12 13 20 22 ...
##  $ : int [1:13266] 1 2 8 10 11 12 13 14 27 28 ...
##  $ : int [1:11795] 3 8 10 11 12 14 15 17 27 28 ...
##  $ : int [1:8940] 3 15 17 18 27 32 37 39 49 55 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.60.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.60.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.60.5.n1<-m_kde_adult_5.60.5$points_in_vertex[1]
    m_kde_adult_5.60.5.n1.vec<-as.vector(unlist(m_kde_adult_5.60.5.n1))
m_kde_adult_5.60.5.n2<-m_kde_adult_5.60.5$points_in_vertex[2]
    m_kde_adult_5.60.5.n2.vec<-as.vector(unlist(m_kde_adult_5.60.5.n2)) 
m_kde_adult_5.60.5.n3<-m_kde_adult_5.60.5$points_in_vertex[3]
    m_kde_adult_5.60.5.n3.vec<-as.vector(unlist(m_kde_adult_5.60.5.n3))
m_kde_adult_5.60.5.n4<-m_kde_adult_5.60.5$points_in_vertex[4]
    m_kde_adult_5.60.5.n4.vec<-as.vector(unlist(m_kde_adult_5.60.5.n4)) 
m_kde_adult_5.60.5.n5<-m_kde_adult_5.60.5 $points_in_vertex[5]
    m_kde_adult_5.60.5.n5.vec<-as.vector(unlist(m_kde_adult_5.60.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n1.vec,]
tda.m_kde_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n2.vec,]
tda.m_kde_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n3.vec,]
tda.m_kde_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n4.vec,]
tda.m_kde_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n5.vec,]
library(caret)

#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]

trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7, 
                                  list = FALSE, 
                                  times = 1)

head(trainIndex)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         4
## [4,]         5
## [5,]         7
## [6,]         8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test  <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models 
fitControl <- trainControl(## 10-fold CV
                           method = "cv",
                           number = 3)
#Non-TDA-Assited
rfGrid<-expand.grid(mtry = (1:20)*50)
#Random Forest 
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
adultRfFit
## Random Forest 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8572372  0.5882934
##    100  0.8546924  0.5828444
##    150  0.8549557  0.5839639
##    200  0.8541660  0.5813642
##    250  0.8546925  0.5827775
##    300  0.8542976  0.5810137
##    350  0.8535957  0.5792854
##    400  0.8531569  0.5788817
##    450  0.8530691  0.5783394
##    500  0.8532884  0.5784096
##    550  0.8538589  0.5809612
##    600  0.8533324  0.5787809
##    650  0.8542099  0.5817356
##    700  0.8543415  0.5823679
##    750  0.8538588  0.5805136
##    800  0.8528498  0.5773478
##    850  0.8537712  0.5804901
##    900  0.8536833  0.5798539
##    950  0.8535079  0.5788895
##   1000  0.8543854  0.5818180
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
adultRfFit$resample
##    Accuracy     Kappa Resample
## 1 0.8636304 0.6053166    Fold1
## 2 0.8575941 0.5924862    Fold3
## 3 0.8504870 0.5670776    Fold2
ad_rf_fit_re<-adultRfFit$resample[1]


summary(adultRfFit)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       22793  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           45586  matrix     numeric  
## oob.times       22793  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               22793  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(adultRfFit,25) + ggtitle("non-TDA-Assisted: RF")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6929   891
##      >50K     487  1461
##                                           
##                Accuracy : 0.8589          
##                  95% CI : (0.8519, 0.8658)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5901          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9343          
##             Specificity : 0.6212          
##          Pos Pred Value : 0.8861          
##          Neg Pred Value : 0.7500          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7094          
##    Detection Prevalence : 0.8006          
##       Balanced Accuracy : 0.7778          
##                                           
##        'Positive' Class :  <=50K          
## 
rf_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.589271e-01   5.901123e-01   8.518658e-01   8.657737e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.976785e-132   1.862103e-27
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9343312            0.6211735            0.8860614 
##       Neg Pred Value            Precision               Recall 
##            0.7500000            0.8860614            0.9343312 
##                   F1           Prevalence       Detection Rate 
##            0.9095563            0.7592138            0.7093571 
## Detection Prevalence    Balanced Accuracy 
##            0.8005733            0.7777523
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.60.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.60.5_n1_RfFit0
## Random Forest 
## 
## 6560 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 4373, 4373, 4374 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8972570  0.3024224
##    100  0.8967994  0.3162878
##    150  0.8981714  0.3326416
##    200  0.8981715  0.3262756
##    250  0.8972567  0.3187978
##    300  0.8987809  0.3319605
##    350  0.8990860  0.3353872
##    400  0.8992384  0.3345968
##    450  0.8971043  0.3250847
##    500  0.8992385  0.3320094
##    550  0.8977139  0.3245236
##    600  0.8984765  0.3302399
##    650  0.8981712  0.3289740
##    700  0.8983237  0.3306323
##    750  0.8971040  0.3238449
##    800  0.8977141  0.3227463
##    850  0.8969515  0.3251635
##    900  0.8981714  0.3270742
##    950  0.8981711  0.3325710
##   1000  0.8977139  0.3231951
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 500.
Adult_TDA_PC_5.60.5_n1_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8930041 0.3253968    Fold1
## 2 0.9034767 0.3422705    Fold3
## 3 0.9012346 0.3283609    Fold2
ad_tda_pc_5.60.5_n1_rf_fit0_re<-Adult_TDA_PC_5.60.5_n1_RfFit0$resample[1]


summary(Adult_TDA_PC_5.60.5_n1_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted        6560  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           13120  matrix     numeric  
## oob.times        6560  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y                6560  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.60.5_n1_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    331     8
##      >50K    7085  2344
##                                          
##                Accuracy : 0.2739         
##                  95% CI : (0.265, 0.2828)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.0203         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.04463        
##             Specificity : 0.99660        
##          Pos Pred Value : 0.97640        
##          Neg Pred Value : 0.24859        
##              Prevalence : 0.75921        
##          Detection Rate : 0.03389        
##    Detection Prevalence : 0.03471        
##       Balanced Accuracy : 0.52062        
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    331     8
##      >50K    7085  2344
##                                          
##                Accuracy : 0.2739         
##                  95% CI : (0.265, 0.2828)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.0203         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.04463        
##             Specificity : 0.99660        
##          Pos Pred Value : 0.97640        
##          Neg Pred Value : 0.24859        
##              Prevalence : 0.75921        
##          Detection Rate : 0.03389        
##    Detection Prevalence : 0.03471        
##       Balanced Accuracy : 0.52062        
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.27385340     0.02033811     0.26502734     0.28281448     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.60.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.04463323           0.99659864           0.97640118 
##       Neg Pred Value            Precision               Recall 
##           0.24859476           0.97640118           0.04463323 
##                   F1           Prevalence       Detection Rate 
##           0.08536428           0.75921376           0.03388616 
## Detection Prevalence    Balanced Accuracy 
##           0.03470516           0.52061593
ad_tda_pc_5.60.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n1_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n1_3_fold
##      Accuracy
## 1 -0.02937373
## 2 -0.04588257
## 3 -0.05074760
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n1_3_fold$probRight
bst_tda_pca_5.60.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n1_3_fold
## $winLeft
## [1] 0.9915333
## 
## $winRope
## [1] 0.008466667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n1_3_fold
## $left
## [1] 0.9748058
## 
## $rope
## [1] 0.01518858
## 
## $right
## [1] 0.01000558
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold))
#bf_tda_pca_5.60.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold)
## t = -6.4936, df = 2, p-value = 0.0229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.06983126 -0.01417134
## sample estimates:
##  mean of x 
## -0.0420013
### Test set diff
diff_tda_pca_5.60.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n1_test
##  Accuracy 
## 0.5850737
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n1_test_odds.left<-bst_tda_pca_5.60.5_rf.n1_test$probLeft/bst_tda_pca_5.60.5_rf.n1_test$probRight
bst_tda_pca_5.60.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1560333
## 
## $winRight
## [1] 0.8439667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n1_test)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n1_test)) #bf_tda_pca_5.60.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n1_test))

##Node2

Adult_TDA_PC_5.60.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.60.5_n2_RfFit0
## Random Forest 
## 
## 13933 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9288, 9289, 9289 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.7492283  0.4989153
##    100  0.7448503  0.4901105
##    150  0.7447069  0.4898526
##    200  0.7444197  0.4892706
##    250  0.7437021  0.4877961
##    300  0.7437738  0.4879519
##    350  0.7419076  0.4842250
##    400  0.7426253  0.4856567
##    450  0.7432713  0.4869599
##    500  0.7435585  0.4875201
##    550  0.7435584  0.4875334
##    600  0.7453528  0.4910833
##    650  0.7439889  0.4883776
##    700  0.7452810  0.4909784
##    750  0.7442761  0.4889678
##    800  0.7433431  0.4870727
##    850  0.7439174  0.4882167
##    900  0.7434148  0.4872738
##    950  0.7441326  0.4886757
##   1000  0.7422666  0.4849815
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7509150 0.5023416    Fold1
## 2 0.7508613 0.5022625    Fold3
## 3 0.7459087 0.4921418    Fold2
ad_tda_pc_5.60.5_n2_rf_fit0_re<-Adult_TDA_PC_5.60.5_n2_RfFit0$resample[1]


summary(Adult_TDA_PC_5.60.5_n2_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       13933  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           27866  matrix     numeric  
## oob.times       13933  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               13933  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.60.5_n2_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2212    10
##      >50K    5204  2342
##                                           
##                Accuracy : 0.4662          
##                  95% CI : (0.4563, 0.4762)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1676          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2983          
##             Specificity : 0.9957          
##          Pos Pred Value : 0.9955          
##          Neg Pred Value : 0.3104          
##              Prevalence : 0.7592          
##          Detection Rate : 0.2265          
##    Detection Prevalence : 0.2275          
##       Balanced Accuracy : 0.6470          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2212    10
##      >50K    5204  2342
##                                           
##                Accuracy : 0.4662          
##                  95% CI : (0.4563, 0.4762)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1676          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2983          
##             Specificity : 0.9957          
##          Pos Pred Value : 0.9955          
##          Neg Pred Value : 0.3104          
##              Prevalence : 0.7592          
##          Detection Rate : 0.2265          
##    Detection Prevalence : 0.2275          
##       Balanced Accuracy : 0.6470          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.4662162      0.1676317      0.4562835      0.4761691      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.60.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2982740            0.9957483            0.9954995 
##       Neg Pred Value            Precision               Recall 
##            0.3103631            0.9954995            0.2982740 
##                   F1           Prevalence       Detection Rate 
##            0.4590164            0.7592138            0.2264537 
## Detection Prevalence    Balanced Accuracy 
##            0.2274775            0.6470112
ad_tda_pc_5.60.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n2_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n2_3_fold
##    Accuracy
## 1 0.1127154
## 2 0.1067328
## 3 0.1045783
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n2_3_fold$probRight
bst_tda_pca_5.60.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009066667
## 
## $winRight
## [1] 0.9909333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n2_3_fold
## $left
## [1] 0.0002833915
## 
## $rope
## [1] 0.0001273035
## 
## $right
## [1] 0.9995893
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold))
#bf_tda_pca_5.60.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold)
## t = 44.373, df = 2, p-value = 0.0005075
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.09753574 0.11848190
## sample estimates:
## mean of x 
## 0.1080088
### Test set diff
diff_tda_pca_5.60.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n2_test
##  Accuracy 
## 0.3927109
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n2_test_odds.left<-bst_tda_pca_5.60.5_rf.n2_test$probLeft/bst_tda_pca_5.60.5_rf.n2_test$probRight
bst_tda_pca_5.60.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1582
## 
## $winRight
## [1] 0.8418
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n2_test)) #bf_tda_pca_5.60.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n2_test))

##Node3

Adult_TDA_PC_5.60.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.60.5_n3_RfFit0
## Random Forest 
## 
## 15744 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10495, 10496, 10497 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8067840  0.4308487
##    100  0.8041797  0.4259349
##    150  0.8035445  0.4243874
##    200  0.8021473  0.4211533
##    250  0.8041162  0.4252953
##    300  0.8037350  0.4249850
##    350  0.8037352  0.4243853
##    400  0.8027825  0.4231159
##    450  0.8034812  0.4241709
##    500  0.8017024  0.4181958
##    550  0.8031636  0.4238124
##    600  0.8041162  0.4257059
##    650  0.8032268  0.4229797
##    700  0.8039256  0.4254003
##    750  0.8042432  0.4270190
##    800  0.8028459  0.4233487
##    850  0.8037352  0.4249111
##    900  0.8050052  0.4286000
##    950  0.8021472  0.4207175
##   1000  0.8037986  0.4250153
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8016765 0.4229297    Fold1
## 2 0.8088431 0.4303138    Fold3
## 3 0.8098323 0.4393027    Fold2
ad_tda_pc_5.60.5_n3_rf_fit0_re<-Adult_TDA_PC_5.60.5_n3_RfFit0$resample[1]


summary(Adult_TDA_PC_5.60.5_n3_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       15744  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           31488  matrix     numeric  
## oob.times       15744  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               15744  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.60.5_n3_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5030   868
##      >50K    2386  1484
##                                           
##                Accuracy : 0.6669          
##                  95% CI : (0.6574, 0.6762)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.2534          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6783          
##             Specificity : 0.6310          
##          Pos Pred Value : 0.8528          
##          Neg Pred Value : 0.3835          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5149          
##    Detection Prevalence : 0.6038          
##       Balanced Accuracy : 0.6546          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5030   868
##      >50K    2386  1484
##                                           
##                Accuracy : 0.6669          
##                  95% CI : (0.6574, 0.6762)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.2534          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6783          
##             Specificity : 0.6310          
##          Pos Pred Value : 0.8528          
##          Neg Pred Value : 0.3835          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5149          
##    Detection Prevalence : 0.6038          
##       Balanced Accuracy : 0.6546          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   6.668714e-01   2.533811e-01   6.574245e-01   6.762187e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  8.053881e-156
ad_tda_pc_5.60.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6782632            0.6309524            0.8528315 
##       Neg Pred Value            Precision               Recall 
##            0.3834625            0.8528315            0.6782632 
##                   F1           Prevalence       Detection Rate 
##            0.7555956            0.7592138            0.5149468 
## Detection Prevalence    Balanced Accuracy 
##            0.6038084            0.6546078
ad_tda_pc_5.60.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n3_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n3_3_fold
##     Accuracy
## 1 0.06195387
## 2 0.04875096
## 3 0.04065465
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n3_3_fold$probRight
bst_tda_pca_5.60.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0095
## 
## $winRight
## [1] 0.9905
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n2_3_fold
## $left
## [1] 0.0002833915
## 
## $rope
## [1] 0.0001273035
## 
## $right
## [1] 0.9995893
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold))
#bf_tda_pca_5.60.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold)
## t = 8.1282, df = 2, p-value = 0.0148
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.02374582 0.07716050
## sample estimates:
##  mean of x 
## 0.05045316
### Test set diff
diff_tda_pca_5.60.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n3_test
##  Accuracy 
## 0.1920557
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n3_test_odds.left<-bst_tda_pca_5.60.5_rf.n3_test$probLeft/bst_tda_pca_5.60.5_rf.n3_test$probRight
bst_tda_pca_5.60.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1591667
## 
## $winRight
## [1] 0.8408333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n3_test))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n3_test)) #bf_tda_pca_5.60.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n2_test)

##Node4

Adult_TDA_PC_5.60.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.60.5_n4_RfFit0
## Random Forest 
## 
## 19829 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 13220, 13219, 13219 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.9482072  0.4322754
##    100  0.9476525  0.4311878
##    150  0.9478542  0.4352071
##    200  0.9469969  0.4260941
##    250  0.9476525  0.4301942
##    300  0.9473499  0.4301530
##    350  0.9469969  0.4257237
##    400  0.9472995  0.4313980
##    450  0.9471482  0.4269776
##    500  0.9474003  0.4300012
##    550  0.9471482  0.4285603
##    600  0.9474003  0.4305603
##    650  0.9475012  0.4293020
##    700  0.9475516  0.4338274
##    750  0.9470473  0.4280267
##    800  0.9469465  0.4259176
##    850  0.9471482  0.4270210
##    900  0.9475516  0.4312180
##    950  0.9471986  0.4288736
##   1000  0.9472491  0.4281409
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9490089 0.4261855    Fold1
## 2 0.9456884 0.4148155    Fold3
## 3 0.9499244 0.4558253    Fold2
ad_tda_pc_5.60.5_n4_rf_fit0_re<-Adult_TDA_PC_5.60.5_n4_RfFit0$resample[1]


summary(Adult_TDA_PC_5.60.5_n4_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       19829  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           39658  matrix     numeric  
## oob.times       19829  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               19829  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.60.5_n4_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  1614
##      >50K       3   738
##                                           
##                Accuracy : 0.8345          
##                  95% CI : (0.8269, 0.8418)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.409           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9996          
##             Specificity : 0.3138          
##          Pos Pred Value : 0.8212          
##          Neg Pred Value : 0.9960          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7589          
##    Detection Prevalence : 0.9241          
##       Balanced Accuracy : 0.6567          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  1614
##      >50K       3   738
##                                           
##                Accuracy : 0.8345          
##                  95% CI : (0.8269, 0.8418)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.409           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9996          
##             Specificity : 0.3138          
##          Pos Pred Value : 0.8212          
##          Neg Pred Value : 0.9960          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7589          
##    Detection Prevalence : 0.9241          
##       Balanced Accuracy : 0.6567          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.344595e-01   4.090247e-01   8.269383e-01   8.417807e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.161054e-73   0.000000e+00
ad_tda_pc_5.60.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9995955            0.3137755            0.8212031 
##       Neg Pred Value            Precision               Recall 
##            0.9959514            0.8212031            0.9995955 
##                   F1           Prevalence       Detection Rate 
##            0.9016603            0.7592138            0.7589066 
## Detection Prevalence    Balanced Accuracy 
##            0.9241400            0.6566855
ad_tda_pc_5.60.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n4_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n4_3_fold
##      Accuracy
## 1 -0.08537855
## 2 -0.08809425
## 3 -0.09943739
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n4_3_fold$probRight
bst_tda_pca_5.60.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n4_3_fold
## $winLeft
## [1] 0.9915667
## 
## $winRope
## [1] 0.008433333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n4_3_fold
## $left
## [1] 0.9981255
## 
## $rope
## [1] 0.00066663
## 
## $right
## [1] 0.001207875
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold))
#bf_tda_pca_5.60.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold)
## t = -21.128, df = 2, p-value = 0.002233
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.10949571 -0.07244441
## sample estimates:
##   mean of x 
## -0.09097006
### Test set diff
diff_tda_pca_5.60.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n4_test
##   Accuracy 
## 0.02446765
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n4_test_odds.left<-bst_tda_pca_5.60.5_rf.n4_test$probLeft/bst_tda_pca_5.60.5_rf.n4_test$probRight
bst_tda_pca_5.60.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1622
## 
## $winRight
## [1] 0.8378
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n4_test))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n4_test)) #bf_tda_pca_5.60.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n4_test))

##Node5

Adult_TDA_PC_5.60.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.60.5_n5_RfFit0
## Random Forest 
## 
## 16508 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11005, 11006, 11005 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.9930943  0.2856353
##    100  0.9929731  0.2966272
##    150  0.9928520  0.2924513
##    200  0.9927308  0.2796054
##    250  0.9928520  0.2924513
##    300  0.9928520  0.2924513
##    350  0.9927914  0.2833461
##    400  0.9927914  0.2910408
##    450  0.9928520  0.2924513
##    500  0.9927914  0.2904632
##    550  0.9927914  0.2833461
##    600  0.9928520  0.2924513
##    650  0.9928520  0.2924513
##    700  0.9927914  0.2904632
##    750  0.9929125  0.2945048
##    800  0.9927308  0.2814734
##    850  0.9928520  0.2924513
##    900  0.9928520  0.2924513
##    950  0.9928520  0.2924513
##   1000  0.9927914  0.2815935
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n5_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9923678 0.2197383    Fold1
## 2 0.9934581 0.3313532    Fold3
## 3 0.9934569 0.3058143    Fold2
ad_tda_pc_5.60.5_n5_rf_fit0_re<-Adult_TDA_PC_5.60.5_n5_RfFit0$resample[1]


summary(Adult_TDA_PC_5.60.5_n5_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       16508  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           33016  matrix     numeric  
## oob.times       16508  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               16508  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.60.5_n5_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7412  2057
##      >50K       4   295
##                                           
##                Accuracy : 0.789           
##                  95% CI : (0.7808, 0.7971)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.522e-12       
##                                           
##                   Kappa : 0.1779          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9995          
##             Specificity : 0.1254          
##          Pos Pred Value : 0.7828          
##          Neg Pred Value : 0.9866          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7588          
##    Detection Prevalence : 0.9694          
##       Balanced Accuracy : 0.5624          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7412  2057
##      >50K       4   295
##                                           
##                Accuracy : 0.789           
##                  95% CI : (0.7808, 0.7971)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.522e-12       
##                                           
##                   Kappa : 0.1779          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9995          
##             Specificity : 0.1254          
##          Pos Pred Value : 0.7828          
##          Neg Pred Value : 0.9866          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7588          
##    Detection Prevalence : 0.9694          
##       Balanced Accuracy : 0.5624          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.890049e-01   1.779051e-01   7.807768e-01   7.970604e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.522100e-12   0.000000e+00
ad_tda_pc_5.60.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9994606            0.1254252            0.7827648 
##       Neg Pred Value            Precision               Recall 
##            0.9866221            0.7827648            0.9994606 
##                   F1           Prevalence       Detection Rate 
##            0.8779390            0.7592138            0.7588043 
## Detection Prevalence    Balanced Accuracy 
##            0.9693898            0.5624429
ad_tda_pc_5.60.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n5_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n5_3_fold
##     Accuracy
## 1 -0.1287374
## 2 -0.1358640
## 3 -0.1429700
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n5_3_fold$probRight
bst_tda_pca_5.60.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n5_3_fold
## $winLeft
## [1] 0.9913
## 
## $winRope
## [1] 0.0087
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n5_3_fold
## $left
## [1] 0.9992911
## 
## $rope
## [1] 0.0001808052
## 
## $right
## [1] 0.0005281393
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold))
#bf_tda_pca_5.60.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold)
## t = -33.067, df = 2, p-value = 0.0009133
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1535349 -0.1181793
## sample estimates:
##  mean of x 
## -0.1358571
### Test set diff
diff_tda_pca_5.60.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n5_test
##   Accuracy 
## 0.06992219
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_rf.n5_test_odds.left<-bst_tda_pca_5.60.5_rf.n5_test$probLeft/bst_tda_pca_5.60.5_rf.n5_test$probRight
bst_tda_pca_5.60.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1624333
## 
## $winRight
## [1] 0.8375667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n5_test))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n5_test)) #bf_tda_pca_5.60.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.60.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_KDE_5.60.5_n1_RfFit0
## Random Forest 
## 
## 15260 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10174, 10172, 10174 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8604187  0.6249923
##    100  0.8600255  0.6239360
##    150  0.8595669  0.6229970
##    200  0.8583217  0.6197994
##    250  0.8596978  0.6235054
##    300  0.8592393  0.6218488
##    350  0.8596323  0.6241861
##    400  0.8587150  0.6209807
##    450  0.8589115  0.6212022
##    500  0.8586493  0.6210787
##    550  0.8593703  0.6224559
##    600  0.8600908  0.6250479
##    650  0.8595013  0.6228378
##    700  0.8596322  0.6231049
##    750  0.8589117  0.6212448
##    800  0.8584530  0.6196371
##    850  0.8589771  0.6216434
##    900  0.8593047  0.6227621
##    950  0.8594359  0.6227783
##   1000  0.8593047  0.6219482
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n1_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8598112 0.6186843    Fold1
## 2 0.8554856 0.6179074    Fold3
## 3 0.8659591 0.6383852    Fold2
ad_tda_kde_5.60.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n1_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.60.5_n1_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       15260  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           30520  matrix     numeric  
## oob.times       15260  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               15260  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.60.5_n1_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7232   559
##      >50K     184  1793
##                                           
##                Accuracy : 0.9239          
##                  95% CI : (0.9185, 0.9291)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.78            
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9752          
##             Specificity : 0.7623          
##          Pos Pred Value : 0.9283          
##          Neg Pred Value : 0.9069          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7404          
##    Detection Prevalence : 0.7976          
##       Balanced Accuracy : 0.8688          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7232   559
##      >50K     184  1793
##                                           
##                Accuracy : 0.9239          
##                  95% CI : (0.9185, 0.9291)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.78            
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9752          
##             Specificity : 0.7623          
##          Pos Pred Value : 0.9283          
##          Neg Pred Value : 0.9069          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7404          
##    Detection Prevalence : 0.7976          
##       Balanced Accuracy : 0.8688          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.239353e-01   7.799778e-01   9.184994e-01   9.291167e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   7.629413e-43
ad_tda_kde_5.60.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9751888            0.7623299            0.9282505 
##       Neg Pred Value            Precision               Recall 
##            0.9069297            0.9282505            0.9751888 
##                   F1           Prevalence       Detection Rate 
##            0.9511409            0.7592138            0.7403767 
## Detection Prevalence    Balanced Accuracy 
##            0.7976044            0.8687594
ad_tda_kde_5.60.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n1_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n1_3_fold
##       Accuracy
## 1  0.003819134
## 2  0.002108457
## 3 -0.015472149
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n1_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n1_3_fold$probRight
bst_tda_kde_5.60.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n1_3_fold
## $winLeft
## [1] 0.04716667
## 
## $winRope
## [1] 0.9528333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n1_3_fold
## $left
## [1] 0.2196185
## 
## $rope
## [1] 0.6777404
## 
## $right
## [1] 0.1026411
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold))
#bf_tda_kde_5.60.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold)
## t = -0.51605, df = 2, p-value = 0.6572
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02970791  0.02334487
## sample estimates:
##   mean of x 
## -0.00318152
### Test set diff
diff_tda_kde_5.60.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n1_test
##    Accuracy 
## -0.06500819
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n1_test_odds.left<-bst_tda_kde_5.60.5_rf.n1_test$probLeft/bst_tda_kde_5.60.5_rf.n1_test$probRight
bst_tda_kde_5.60.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n1_test
## $winLeft
## [1] 0.8402333
## 
## $winRope
## [1] 0.1597667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n1_test))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n1_test)) #bf_tda_kde_5.60.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n1_test))

##Node2

Adult_TDA_KDE_5.60.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n2.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
Adult_TDA_KDE_5.60.5_n2_RfFit0
## Random Forest 
## 
## 14482 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9654, 9655, 9655 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8442202  0.6010694
##    100  0.8422869  0.5986739
##    150  0.8422180  0.5980398
##    200  0.8408369  0.5946913
##    250  0.8403535  0.5931887
##    300  0.8418727  0.5974886
##    350  0.8418726  0.5973079
##    400  0.8429085  0.6003993
##    450  0.8418727  0.5977362
##    500  0.8411132  0.5960861
##    550  0.8424251  0.5984086
##    600  0.8406987  0.5942363
##    650  0.8429084  0.5995989
##    700  0.8417345  0.5967635
##    750  0.8410440  0.5954497
##    800  0.8425633  0.5991881
##    850  0.8412512  0.5961252
##    900  0.8417346  0.5971376
##    950  0.8404226  0.5935948
##   1000  0.8408370  0.5945127
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8475559 0.6104136    Fold1
## 2 0.8388233 0.5879605    Fold3
## 3 0.8462813 0.6048341    Fold2
ad_tda_KDE_5.60.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n2_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.60.5_n2_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       14482  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           28964  matrix     numeric  
## oob.times       14482  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               14482  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.60.5_n2_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7197   498
##      >50K     219  1854
##                                           
##                Accuracy : 0.9266          
##                  95% CI : (0.9212, 0.9317)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7908          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9705          
##             Specificity : 0.7883          
##          Pos Pred Value : 0.9353          
##          Neg Pred Value : 0.8944          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7368          
##    Detection Prevalence : 0.7878          
##       Balanced Accuracy : 0.8794          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7197   498
##      >50K     219  1854
##                                           
##                Accuracy : 0.9266          
##                  95% CI : (0.9212, 0.9317)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7908          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9705          
##             Specificity : 0.7883          
##          Pos Pred Value : 0.9353          
##          Neg Pred Value : 0.8944          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7368          
##    Detection Prevalence : 0.7878          
##       Balanced Accuracy : 0.8794          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.265971e-01   7.907608e-01   9.212457e-01   9.316923e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   2.991211e-25
ad_tda_kde_5.60.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9704693            0.7882653            0.9352827 
##       Neg Pred Value            Precision               Recall 
##            0.8943560            0.9352827            0.9704693 
##                   F1           Prevalence       Detection Rate 
##            0.9525511            0.7592138            0.7367936 
## Detection Prevalence    Balanced Accuracy 
##            0.7877764            0.8793673
ad_tda_kde_5.60.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.60.5_n2_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n2_3_fold
##      Accuracy
## 1 0.016074457
## 2 0.018770818
## 3 0.004205636
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n2_3_fold$probRight
bst_tda_kde_5.60.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3032333
## 
## $winRight
## [1] 0.6967667
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n2_3_fold
## $left
## [1] 0.02343112
## 
## $rope
## [1] 0.2857245
## 
## $right
## [1] 0.6908443
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold))
#bf_tda_kde_5.60.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold)
## t = 2.9095, df = 2, p-value = 0.1006
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.006232661  0.032266601
## sample estimates:
##  mean of x 
## 0.01301697
### Test set diff
diff_tda_kde_5.60.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n2_test
##    Accuracy 
## -0.06766994
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n2_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n2_test_odds.left<-bst_tda_kde_5.60.5_rf.n2_test$probLeft/bst_tda_kde_5.60.5_rf.n2_test$probRight
bst_tda_kde_5.60.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n2_test
## $winLeft
## [1] 0.8399
## 
## $winRope
## [1] 0.1601
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n2_test))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n2_test)) #bf_tda_kde_5.60.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n2_test))

##Node3

Adult_TDA_KDE_5.60.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
Adult_TDA_KDE_5.60.5_n3_RfFit0
## Random Forest 
## 
## 13266 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8844, 8844, 8844 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8365747  0.5678172
##    100  0.8328057  0.5585570
##    150  0.8352932  0.5650890
##    200  0.8343133  0.5624650
##    250  0.8316750  0.5552553
##    300  0.8328810  0.5588771
##    350  0.8331826  0.5590260
##    400  0.8343133  0.5625154
##    450  0.8317503  0.5555991
##    500  0.8322026  0.5566543
##    550  0.8344640  0.5627715
##    600  0.8323534  0.5574370
##    650  0.8319765  0.5556011
##    700  0.8337102  0.5614182
##    750  0.8326549  0.5583142
##    800  0.8340871  0.5619008
##    850  0.8339364  0.5615830
##    900  0.8331072  0.5591186
##    950  0.8340118  0.5623831
##   1000  0.8328057  0.5586455
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8353686 0.5651862    Fold1
## 2 0.8371777 0.5704360    Fold3
## 3 0.8371777 0.5678294    Fold2
ad_tda_kde_5.60.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n3_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.60.5_n3_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       13266  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           26532  matrix     numeric  
## oob.times       13266  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               13266  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.60.5_n3_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7141   539
##      >50K     275  1813
##                                          
##                Accuracy : 0.9167         
##                  95% CI : (0.911, 0.9221)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.763          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9629         
##             Specificity : 0.7708         
##          Pos Pred Value : 0.9298         
##          Neg Pred Value : 0.8683         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7311         
##    Detection Prevalence : 0.7862         
##       Balanced Accuracy : 0.8669         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.60.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7141   539
##      >50K     275  1813
##                                          
##                Accuracy : 0.9167         
##                  95% CI : (0.911, 0.9221)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.763          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9629         
##             Specificity : 0.7708         
##          Pos Pred Value : 0.9298         
##          Neg Pred Value : 0.8683         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7311         
##    Detection Prevalence : 0.7862         
##       Balanced Accuracy : 0.8669         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.60.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.166667e-01   7.629915e-01   9.110094e-01   9.220740e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   3.022836e-20
ad_tda_kde_5.60.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9629180            0.7708333            0.9298177 
##       Neg Pred Value            Precision               Recall 
##            0.8682950            0.9298177            0.9629180 
##                   F1           Prevalence       Detection Rate 
##            0.9460784            0.7592138            0.7310606 
## Detection Prevalence    Balanced Accuracy 
##            0.7862408            0.8668757
ad_tda_kde_5.60.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n3_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n3_3_fold
##     Accuracy
## 1 0.02826177
## 2 0.02041636
## 3 0.01330922
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n3_3_fold$probRight
bst_tda_kde_5.60.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.03723333
## 
## $winRight
## [1] 0.9627667
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n3_3_fold
## $left
## [1] 0.01271962
## 
## $rope
## [1] 0.07023381
## 
## $right
## [1] 0.9170466
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold))
#bf_tda_kde_5.60.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold)
## t = 4.785, df = 2, p-value = 0.04101
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.002082813 0.039242086
## sample estimates:
##  mean of x 
## 0.02066245
### Test set diff
diff_tda_kde_5.60.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n3_test
##    Accuracy 
## -0.05773956
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n3_test_odds.left<-bst_tda_kde_5.60.5_rf.n3_test$probLeft/bst_tda_kde_5.60.5_rf.n3_test$probRight
bst_tda_kde_5.60.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n3_test
## $winLeft
## [1] 0.8409667
## 
## $winRope
## [1] 0.1590333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n3_test))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n3_test)) #bf_tda_kde_5.60.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n3_test))

##Node4

Adult_TDA_KDE_5.60.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n4.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
Adult_TDA_KDE_5.60.5_n4_RfFit0
## Random Forest 
## 
## 11795 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7863, 7864, 7863 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8538363  0.5382207
##    100  0.8498513  0.5275065
##    150  0.8506991  0.5298670
##    200  0.8503600  0.5301043
##    250  0.8502752  0.5293552
##    300  0.8506992  0.5321344
##    350  0.8512078  0.5330523
##    400  0.8508689  0.5316184
##    450  0.8512079  0.5328793
##    500  0.8516318  0.5342367
##    550  0.8506991  0.5307403
##    600  0.8511231  0.5331564
##    650  0.8510383  0.5319712
##    700  0.8507840  0.5314409
##    750  0.8495970  0.5290543
##    800  0.8486644  0.5246464
##    850  0.8504449  0.5312280
##    900  0.8501906  0.5297986
##    950  0.8495969  0.5278199
##   1000  0.8508686  0.5317453
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8611394 0.5534385    Fold1
## 2 0.8476602 0.5170971    Fold3
## 3 0.8527092 0.5441265    Fold2
ad_tda_kde_5.60.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n4_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.60.5_n4_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       11795  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           23590  matrix     numeric  
## oob.times       11795  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               11795  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.60.5_n4_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6799   597
##      >50K     617  1755
##                                          
##                Accuracy : 0.8757         
##                  95% CI : (0.869, 0.8822)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.6611         
##                                          
##  Mcnemar's Test P-Value : 0.5855         
##                                          
##             Sensitivity : 0.9168         
##             Specificity : 0.7462         
##          Pos Pred Value : 0.9193         
##          Neg Pred Value : 0.7399         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6960         
##    Detection Prevalence : 0.7572         
##       Balanced Accuracy : 0.8315         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.60.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6799   597
##      >50K     617  1755
##                                          
##                Accuracy : 0.8757         
##                  95% CI : (0.869, 0.8822)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.6611         
##                                          
##  Mcnemar's Test P-Value : 0.5855         
##                                          
##             Sensitivity : 0.9168         
##             Specificity : 0.7462         
##          Pos Pred Value : 0.9193         
##          Neg Pred Value : 0.7399         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6960         
##    Detection Prevalence : 0.7572         
##       Balanced Accuracy : 0.8315         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.60.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.757166e-01   6.610558e-01   8.690109e-01   8.821974e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.835282e-184   5.855396e-01
ad_tda_kde_5.60.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9168015            0.7461735            0.9192807 
##       Neg Pred Value            Precision               Recall 
##            0.7398820            0.9192807            0.9168015 
##                   F1           Prevalence       Detection Rate 
##            0.9180394            0.7592138            0.6960483 
## Detection Prevalence    Balanced Accuracy 
##            0.7571663            0.8314875
ad_tda_kde_5.60.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n4_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n4_3_fold
##       Accuracy
## 1  0.002491011
## 2  0.009933880
## 3 -0.002222264
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n4_3_fold$probRight
bst_tda_kde_5.60.5_rf.n4_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n4_3_fold
## $left
## [1] 0.04086506
## 
## $rope
## [1] 0.8353051
## 
## $right
## [1] 0.1238298
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.60.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n4_test
##    Accuracy 
## -0.01678952
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n4_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 
#BayesFactor
#bf_tda_kde_5.60.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold))
#bf_tda_kde_5.60.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold)
## t = 0.9611, df = 2, p-value = 0.4379
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.01182424  0.01862600
## sample estimates:
##   mean of x 
## 0.003400876
bst_tda_kde_5.60.5_rf.n4_test_odds.left<-bst_tda_kde_5.60.5_rf.n4_test$probLeft/bst_tda_kde_5.60.5_rf.n4_test$probRight
bst_tda_kde_5.60.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n4_test
## $winLeft
## [1] 0.5425
## 
## $winRope
## [1] 0.4575
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n4_test))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n4_test)) #bf_tda_kde_5.60.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n4_test))

##Node5

Adult_TDA_KDE_5.60.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n5.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.60.5_n5_RfFit0
## Random Forest 
## 
## 8940 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5960, 5960, 5960 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8734899  0.4335920
##    100  0.8694631  0.4247921
##    150  0.8682327  0.4222203
##    200  0.8690157  0.4261507
##    250  0.8674497  0.4170683
##    300  0.8684564  0.4220421
##    350  0.8700224  0.4300513
##    400  0.8681208  0.4196209
##    450  0.8680089  0.4192217
##    500  0.8680089  0.4199632
##    550  0.8681208  0.4226921
##    600  0.8685682  0.4242608
##    650  0.8680089  0.4187985
##    700  0.8685682  0.4239588
##    750  0.8673378  0.4198456
##    800  0.8674497  0.4181890
##    850  0.8680089  0.4232862
##    900  0.8682327  0.4218440
##    950  0.8671141  0.4182009
##   1000  0.8683445  0.4216936
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n5_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8684564 0.4186523    Fold1
## 2 0.8704698 0.4228168    Fold3
## 3 0.8815436 0.4593069    Fold2
ad_tda_kde_5.60.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n5_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.60.5_n5_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted        8940  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           17880  matrix     numeric  
## oob.times        8940  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y                8940  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.60.5_n5_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6998   860
##      >50K     418  1492
##                                           
##                Accuracy : 0.8692          
##                  95% CI : (0.8623, 0.8758)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6176          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9436          
##             Specificity : 0.6344          
##          Pos Pred Value : 0.8906          
##          Neg Pred Value : 0.7812          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7164          
##    Detection Prevalence : 0.8045          
##       Balanced Accuracy : 0.7890          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6998   860
##      >50K     418  1492
##                                           
##                Accuracy : 0.8692          
##                  95% CI : (0.8623, 0.8758)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6176          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9436          
##             Specificity : 0.6344          
##          Pos Pred Value : 0.8906          
##          Neg Pred Value : 0.7812          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7164          
##    Detection Prevalence : 0.8045          
##       Balanced Accuracy : 0.7890          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.691646e-01   6.176167e-01   8.623158e-01   8.757924e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.668548e-162   5.798927e-35
ad_tda_kde_5.60.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9436354            0.6343537            0.8905574 
##       Neg Pred Value            Precision               Recall 
##            0.7811518            0.8905574            0.9436354 
##                   F1           Prevalence       Detection Rate 
##            0.9163284            0.7592138            0.7164210 
## Detection Prevalence    Balanced Accuracy 
##            0.8044636            0.7889946
ad_tda_kde_5.60.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n5_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n5_3_fold
##       Accuracy
## 1 -0.004825995
## 2 -0.012875695
## 3 -0.031056654
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n5_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n5_3_fold$probRight
bst_tda_kde_5.60.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n5_3_fold
## $winLeft
## [1] 0.6046667
## 
## $winRope
## [1] 0.3953333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n5_3_fold
## $left
## [1] 0.7212907
## 
## $rope
## [1] 0.2290173
## 
## $right
## [1] 0.04969198
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold))
#bf_tda_kde_5.60.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold)
## t = -2.0949, df = 2, p-value = 0.1712
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.04963329  0.01712773
## sample estimates:
##   mean of x 
## -0.01625278
### Test set diff
diff_tda_kde_5.60.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n5_test
##    Accuracy 
## -0.01023751
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n5_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_rf.n5_test_odds.left<-bst_tda_kde_5.60.5_rf.n5_test$probLeft/bst_tda_kde_5.60.5_rf.n5_test$probRight
bst_tda_kde_5.60.5_rf.n5_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n5_test
## $winLeft
## [1] 0.5425333
## 
## $winRope
## [1] 0.4574667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n5_test))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n5_test)) #bf_tda_kde_5.60.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n5_test))

##Non-TDA-Assisted

svmGrid<-expand.grid(sigma = c(0.1, 1, 10), C = (1:5*0.25))

#Support Vector Machine-Radial Basis 
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                   Importance = T,
                   method = 'svmRadial', 
                 trControl = fitControl,
                         tuneGrid = svmGrid, preProc = c('center','scale'),
                         metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa        
##    0.1   0.25  0.8071777   0.3474412847
##    0.1   0.50  0.8161718   0.4016471422
##    0.1   0.75  0.8205591   0.4288367993
##    0.1   1.00  0.8231914   0.4462417109
##    0.1   1.25  0.8233669   0.4527020877
##    1.0   0.25  0.7780021   0.1405706744
##    1.0   0.50  0.7882245   0.2200226573
##    1.0   0.75  0.7926118   0.2614948044
##    1.0   1.00  0.7961655   0.2925500442
##    1.0   1.25  0.7971746   0.3092072051
##   10.0   0.25  0.7590050  -0.0003505724
##   10.0   0.50  0.7604528   0.0157859748
##   10.0   0.75  0.7632606   0.0455007542
##   10.0   1.00  0.7655859   0.0761557182
##   10.0   1.25  0.7660685   0.0942110410
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
adultSvmFit$resample
##    Accuracy     Kappa Resample
## 1 0.8242727 0.4594833    Fold1
## 2 0.8223217 0.4519671    Fold3
## 3 0.8235062 0.4466559    Fold2
ad_svm_fit_re<-adultSvmFit$resample[1]

summary(adultSvmFit)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(adultSvmFit, 25) + ggtitle("non-TDA-Assited Svm")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6941  1215
##      >50K     475  1137
##                                           
##                Accuracy : 0.827           
##                  95% CI : (0.8193, 0.8344)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4698          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9359          
##             Specificity : 0.4834          
##          Pos Pred Value : 0.8510          
##          Neg Pred Value : 0.7053          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7106          
##    Detection Prevalence : 0.8350          
##       Balanced Accuracy : 0.7097          
##                                           
##        'Positive' Class :  <=50K          
## 
svm_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.269861e-01   4.698380e-01   8.193364e-01   8.344402e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.336799e-59   2.986040e-72
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9359493            0.4834184            0.8510299 
##       Neg Pred Value            Precision               Recall 
##            0.7053350            0.8510299            0.9359493 
##                   F1           Prevalence       Detection Rate 
##            0.8914719            0.7592138            0.7105856 
## Detection Prevalence    Balanced Accuracy 
##            0.8349713            0.7096838
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.60.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13933 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9288, 9289, 9289 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.6877912  0.37316378
##    0.1   0.50  0.7017151  0.40186511
##    0.1   0.75  0.7032941  0.40552941
##    0.1   1.00  0.7054471  0.41015184
##    0.1   1.25  0.7061649  0.41173707
##    1.0   0.25  0.6186746  0.22944832
##    1.0   0.50  0.6460194  0.28662967
##    1.0   0.75  0.6576468  0.31122720
##    1.0   1.00  0.6642500  0.32544440
##    1.0   1.25  0.6653267  0.32791605
##   10.0   0.25  0.5252995  0.03411200
##   10.0   0.50  0.5498455  0.08575684
##   10.0   0.75  0.5718796  0.13249578
##   10.0   1.00  0.5919035  0.17516474
##   10.0   1.25  0.5985064  0.18909381
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.60.5_n1_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7113025 0.4221246    Fold1
## 2 0.7047804 0.4090989    Fold3
## 3 0.7024117 0.4039877    Fold2
ad_tda_pc_5.60.5_n1_svm_fit_re<-Adult_TDA_PC_5.60.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.60.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.60.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n1_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.60.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1770   436
##      >50K    5646  1916
##                                           
##                Accuracy : 0.3774          
##                  95% CI : (0.3677, 0.3871)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0303          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2387          
##             Specificity : 0.8146          
##          Pos Pred Value : 0.8024          
##          Neg Pred Value : 0.2534          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1812          
##    Detection Prevalence : 0.2258          
##       Balanced Accuracy : 0.5266          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1770   436
##      >50K    5646  1916
##                                           
##                Accuracy : 0.3774          
##                  95% CI : (0.3677, 0.3871)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0303          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2387          
##             Specificity : 0.8146          
##          Pos Pred Value : 0.8024          
##          Neg Pred Value : 0.2534          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1812          
##    Detection Prevalence : 0.2258          
##       Balanced Accuracy : 0.5266          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.37735463     0.03034731     0.36772860     0.38705383     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.60.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2386731            0.8146259            0.8023572 
##       Neg Pred Value            Precision               Recall 
##            0.2533721            0.8023572            0.2386731 
##                   F1           Prevalence       Detection Rate 
##            0.3679069            0.7592138            0.1812039 
## Detection Prevalence    Balanced Accuracy 
##            0.2258395            0.5266495
ad_tda_pc_5.60.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n1_svm_fit_re)
diff_tda_pca_5.60.5_svm_n1_3_fold
##    Accuracy
## 1 0.1129703
## 2 0.1175413
## 3 0.1210945
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n1_3_fold$probRight
bst_tda_pca_5.60.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0086
## 
## $winRight
## [1] 0.9914
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n1_3_fold
## $left
## [1] 0.000227652
## 
## $rope
## [1] 9.277757e-05
## 
## $right
## [1] 0.9996796
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold))
#bf_tda_pca_5.60.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold)
## t = 49.844, df = 2, p-value = 0.0004023
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1070848 0.1273192
## sample estimates:
## mean of x 
##  0.117202
### Test set diff
diff_tda_pca_5.60.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n1_test
##  Accuracy 
## 0.4496314
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n1_test_odds.left<-bst_tda_pca_5.60.5_svm.n1_test$probLeft/bst_tda_pca_5.60.5_svm.n1_test$probRight
bst_tda_pca_5.60.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1624
## 
## $winRight
## [1] 0.8376
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n1_test)))

#BayesFactor
#bf_tda_pca_5.60.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n1_test)) #bf_tda_pca_5.60.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.60.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13933 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9290, 9288, 9288 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.6891556  0.37591079
##    0.1   0.50  0.7011412  0.40072167
##    0.1   0.75  0.7058782  0.41058935
##    0.1   1.00  0.7088213  0.41682332
##    0.1   1.25  0.7093239  0.41798372
##    1.0   0.25  0.6184620  0.22902828
##    1.0   0.50  0.6447297  0.28393248
##    1.0   0.75  0.6573611  0.31064501
##    1.0   1.00  0.6647531  0.32642489
##    1.0   1.25  0.6632457  0.32368642
##   10.0   0.25  0.5244385  0.03227667
##   10.0   0.50  0.5483392  0.08266416
##   10.0   0.75  0.5705883  0.12985391
##   10.0   1.00  0.5898947  0.17099505
##   10.0   1.25  0.5962830  0.18448001
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.60.5_n2_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7141934 0.4282358    Fold1
## 2 0.7063509 0.4118004    Fold3
## 3 0.7074273 0.4139150    Fold2
ad_tda_pc_5.60.5_n2_svm_fit_re<-Adult_TDA_PC_5.60.5_n2_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.60.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.60.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n2_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.60.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1770   436
##      >50K    5646  1916
##                                           
##                Accuracy : 0.3774          
##                  95% CI : (0.3677, 0.3871)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0303          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2387          
##             Specificity : 0.8146          
##          Pos Pred Value : 0.8024          
##          Neg Pred Value : 0.2534          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1812          
##    Detection Prevalence : 0.2258          
##       Balanced Accuracy : 0.5266          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1770   436
##      >50K    5646  1916
##                                           
##                Accuracy : 0.3774          
##                  95% CI : (0.3677, 0.3871)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0303          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2387          
##             Specificity : 0.8146          
##          Pos Pred Value : 0.8024          
##          Neg Pred Value : 0.2534          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1812          
##    Detection Prevalence : 0.2258          
##       Balanced Accuracy : 0.5266          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.37735463     0.03034731     0.36772860     0.38705383     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.60.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2386731            0.8146259            0.8023572 
##       Neg Pred Value            Precision               Recall 
##            0.2533721            0.8023572            0.2386731 
##                   F1           Prevalence       Detection Rate 
##            0.3679069            0.7592138            0.1812039 
## Detection Prevalence    Balanced Accuracy 
##            0.2258395            0.5266495
ad_tda_pc_5.60.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n2_svm_fit_re)
diff_tda_pca_5.60.5_svm_n2_3_fold
##    Accuracy
## 1 0.1100793
## 2 0.1159707
## 3 0.1160788
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n2_3_fold$probRight
bst_tda_pca_5.60.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008266667
## 
## $winRight
## [1] 0.9917333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n2_3_fold
## $left
## [1] 0.0001701296
## 
## $rope
## [1] 7.1642e-05
## 
## $right
## [1] 0.9997582
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold))
#bf_tda_pca_5.60.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold)
## t = 57.537, df = 2, p-value = 0.0003019
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1055148 0.1225711
## sample estimates:
## mean of x 
##  0.114043
### Test set diff
diff_tda_pca_5.60.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n2_test
##  Accuracy 
## 0.4496314
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n2_test_odds.left<-bst_tda_pca_5.60.5_svm.n2_test$probLeft/bst_tda_pca_5.60.5_svm.n2_test$probRight
bst_tda_pca_5.60.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1609333
## 
## $winRight
## [1] 0.8390667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n2_test)))

#BayesFactor
#bf_tda_pca_5.60.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n2_test)) #bf_tda_pca_5.60.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n2_test))

##Node3

Adult_TDA_PC_5.60.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 15744 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10496, 10497, 10495 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa        
##    0.1   0.25  0.7545733   0.1000473290
##    0.1   0.50  0.7630845   0.1739094144
##    0.1   0.75  0.7665781   0.2100243327
##    0.1   1.00  0.7670228   0.2286455925
##    0.1   1.25  0.7665782   0.2380677939
##    1.0   0.25  0.7458714   0.0187116472
##    1.0   0.50  0.7506985   0.0777461201
##    1.0   0.75  0.7520324   0.1087844313
##    1.0   1.00  0.7536838   0.1398155746
##    1.0   1.25  0.7516515   0.1536489238
##   10.0   0.25  0.7440930  -0.0001269622
##   10.0   0.50  0.7440930   0.0073361909
##   10.0   0.75  0.7440295   0.0241120566
##   10.0   1.00  0.7419969   0.0366308310
##   10.0   1.25  0.7399642   0.0451915066
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.60.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7682927 0.2349959    Fold1
## 2 0.7699638 0.2407153    Fold2
## 3 0.7628120 0.2102256    Fold3
ad_tda_pc_5.60.5_n3_svm_fit_re<-Adult_TDA_PC_5.60.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.60.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.60.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n3_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.60.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6786  1657
##      >50K     630   695
##                                           
##                Accuracy : 0.7659          
##                  95% CI : (0.7573, 0.7742)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.06312         
##                                           
##                   Kappa : 0.2474          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.9150          
##             Specificity : 0.2955          
##          Pos Pred Value : 0.8037          
##          Neg Pred Value : 0.5245          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6947          
##    Detection Prevalence : 0.8644          
##       Balanced Accuracy : 0.6053          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6786  1657
##      >50K     630   695
##                                           
##                Accuracy : 0.7659          
##                  95% CI : (0.7573, 0.7742)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.06312         
##                                           
##                   Kappa : 0.2474          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.9150          
##             Specificity : 0.2955          
##          Pos Pred Value : 0.8037          
##          Neg Pred Value : 0.5245          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6947          
##    Detection Prevalence : 0.8644          
##       Balanced Accuracy : 0.6053          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.658681e-01   2.474291e-01   7.573408e-01   7.742366e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   6.311549e-02  4.163612e-102
ad_tda_pc_5.60.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9150485            0.2954932            0.8037427 
##       Neg Pred Value            Precision               Recall 
##            0.5245283            0.8037427            0.9150485 
##                   F1           Prevalence       Detection Rate 
##            0.8557917            0.7592138            0.6947174 
## Detection Prevalence    Balanced Accuracy 
##            0.8643530            0.6052709
ad_tda_pc_5.60.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n3_svm_fit_re)
diff_tda_pca_5.60.5_svm_n3_3_fold
##     Accuracy
## 1 0.05598006
## 2 0.05235787
## 3 0.06069422
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n3_3_fold$probRight
bst_tda_pca_5.60.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009033333
## 
## $winRight
## [1] 0.9909667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n3_3_fold
## $left
## [1] 0.0008798415
## 
## $rope
## [1] 0.0009182851
## 
## $right
## [1] 0.9982019
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold))
#bf_tda_pca_5.60.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold)
## t = 23.347, df = 2, p-value = 0.00183
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.04596017 0.06672794
## sample estimates:
##  mean of x 
## 0.05634405
### Test set diff
diff_tda_pca_5.60.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n3_test
##   Accuracy 
## 0.06111794
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n3_test_odds.left<-bst_tda_pca_5.60.5_svm.n3_test$probLeft/bst_tda_pca_5.60.5_svm.n3_test$probRight
bst_tda_pca_5.60.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1582
## 
## $winRight
## [1] 0.8418
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n3_test)))

#BayesFactor
#bf_tda_pca_5.60.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n3_test)) #bf_tda_pca_5.60.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n3_test))


##Node4

Adult_TDA_PC_5.60.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 19829 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 13220, 13219, 13219 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.9351959  0.018364285
##    0.1   0.50  0.9364063  0.096513530
##    0.1   0.75  0.9360029  0.137415519
##    0.1   1.00  0.9354481  0.155378198
##    0.1   1.25  0.9354482  0.180673587
##    1.0   0.25  0.9351455  0.009329094
##    1.0   0.50  0.9353977  0.026900449
##    1.0   0.75  0.9360028  0.058350931
##    1.0   1.00  0.9361037  0.077788990
##    1.0   1.25  0.9355490  0.094435490
##   10.0   0.25  0.9351455  0.000000000
##   10.0   0.50  0.9351455  0.001346823
##   10.0   0.75  0.9354481  0.012684050
##   10.0   1.00  0.9352464  0.014884351
##   10.0   1.25  0.9348429  0.020516765
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 0.5.
Adult_TDA_PC_5.60.5_n4_SvmFit0$resample
##    Accuracy      Kappa Resample
## 1 0.9367625 0.09707067    Fold3
## 2 0.9366016 0.07198835    Fold1
## 3 0.9358548 0.12048157    Fold2
ad_tda_pc_5.60.5_n4_svm_fit_re<-Adult_TDA_PC_5.60.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.60.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.60.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n4_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.60.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7407  2296
##      >50K       9    56
##                                           
##                Accuracy : 0.764           
##                  95% CI : (0.7555, 0.7724)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.1355          
##                                           
##                   Kappa : 0.0338          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99879         
##             Specificity : 0.02381         
##          Pos Pred Value : 0.76337         
##          Neg Pred Value : 0.86154         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75829         
##    Detection Prevalence : 0.99335         
##       Balanced Accuracy : 0.51130         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7407  2296
##      >50K       9    56
##                                           
##                Accuracy : 0.764           
##                  95% CI : (0.7555, 0.7724)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.1355          
##                                           
##                   Kappa : 0.0338          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99879         
##             Specificity : 0.02381         
##          Pos Pred Value : 0.76337         
##          Neg Pred Value : 0.86154         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75829         
##    Detection Prevalence : 0.99335         
##       Balanced Accuracy : 0.51130         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76402539     0.03382565     0.75547581     0.77241727     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.13548167     0.00000000
ad_tda_pc_5.60.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99878641           0.02380952           0.76337215 
##       Neg Pred Value            Precision               Recall 
##           0.86153846           0.76337215           0.99878641 
##                   F1           Prevalence       Detection Rate 
##           0.86535428           0.75921376           0.75829238 
## Detection Prevalence    Balanced Accuracy 
##           0.99334562           0.51129797
ad_tda_pc_5.60.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n4_svm_fit_re)
diff_tda_pca_5.60.5_svm_n4_3_fold
##     Accuracy
## 1 -0.1124897
## 2 -0.1142799
## 3 -0.1123486
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n4_3_fold$probRight
bst_tda_pca_5.60.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n4_3_fold
## $winLeft
## [1] 0.9913667
## 
## $winRope
## [1] 0.008633333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n4_3_fold
## $left
## [1] 0.9999757
## 
## $rope
## [1] 7.245483e-06
## 
## $right
## [1] 1.701437e-05
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold))
#bf_tda_pca_5.60.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold)
## t = -181.85, df = 2, p-value = 3.024e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1157139 -0.1103649
## sample estimates:
##  mean of x 
## -0.1130394
### Test set diff
diff_tda_pca_5.60.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n4_test
##   Accuracy 
## 0.06296069
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n4_test_odds.left<-bst_tda_pca_5.60.5_svm.n4_test$probLeft/bst_tda_pca_5.60.5_svm.n4_test$probRight
bst_tda_pca_5.60.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1584333
## 
## $winRight
## [1] 0.8415667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n4_test)))

#BayesFactor
#bf_tda_pca_5.60.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n4_test)) #bf_tda_pca_5.60.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n4_test))

##Node5

Adult_TDA_PC_5.60.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 16508 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11005, 11005, 11006 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa        
##    0.1   0.25  0.9921250   0.0000000000
##    0.1   0.50  0.9921250   0.0000000000
##    0.1   0.75  0.9921250   0.0000000000
##    0.1   1.00  0.9920645  -0.0001184348
##    0.1   1.25  0.9919433  -0.0003501805
##    1.0   0.25  0.9921250   0.0000000000
##    1.0   0.50  0.9921250   0.0000000000
##    1.0   0.75  0.9921250   0.0000000000
##    1.0   1.00  0.9920645  -0.0001184960
##    1.0   1.25  0.9920039   0.0137398679
##   10.0   0.25  0.9921250   0.0000000000
##   10.0   0.50  0.9921250   0.0000000000
##   10.0   0.75  0.9921250   0.0000000000
##   10.0   1.00  0.9921250   0.0000000000
##   10.0   1.25  0.9920645  -0.0001184348
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 10 and C = 0.25.
Adult_TDA_PC_5.60.5_n5_SvmFit0$resample
##    Accuracy Kappa Resample
## 1 0.9920044     0    Fold1
## 2 0.9921861     0    Fold2
## 3 0.9921847     0    Fold3
ad_tda_pc_5.60.5_n5_svm_fit_re<-Adult_TDA_PC_5.60.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.60.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.60.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n5_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.60.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.60.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.60.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n5_svm_fit_re)
diff_tda_pca_5.60.5_svm_n5_3_fold
##     Accuracy
## 1 -0.1677316
## 2 -0.1698644
## 3 -0.1686785
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n5_3_fold$probRight
bst_tda_pca_5.60.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n5_3_fold
## $winLeft
## [1] 0.9901333
## 
## $winRope
## [1] 0.009866667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n5_3_fold
## $left
## [1] 0.9999899
## 
## $rope
## [1] 2.126858e-06
## 
## $right
## [1] 7.941433e-06
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold))
#bf_tda_pca_5.60.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold)
## t = -273.53, df = 2, p-value = 1.337e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1714128 -0.1661036
## sample estimates:
##  mean of x 
## -0.1687582
### Test set diff
diff_tda_pca_5.60.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n5_test
##   Accuracy 
## 0.06777232
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_svm.n5_test_odds.left<-bst_tda_pca_5.60.5_svm.n5_test$probLeft/bst_tda_pca_5.60.5_svm.n5_test$probRight
bst_tda_pca_5.60.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1589333
## 
## $winRight
## [1] 0.8410667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n5_test)))

#BayesFactor
#bf_tda_pca_5.60.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n5_test)) #bf_tda_pca_5.60.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.60.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 15260 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10173, 10173, 10174 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7984271  0.354881307
##    0.1   0.50  0.8079291  0.410298159
##    0.1   0.75  0.8141544  0.444011743
##    0.1   1.00  0.8157927  0.458883476
##    0.1   1.25  0.8158585  0.465014365
##    1.0   0.25  0.7637615  0.146797472
##    1.0   0.50  0.7745738  0.216335733
##    1.0   0.75  0.7809958  0.263017354
##    1.0   1.00  0.7844034  0.294498887
##    1.0   1.25  0.7868936  0.317219627
##   10.0   0.25  0.7401704  0.000745815
##   10.0   0.50  0.7427261  0.018764032
##   10.0   0.75  0.7460681  0.045819372
##   10.0   1.00  0.7506553  0.080662433
##   10.0   1.25  0.7524246  0.102486413
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.60.5_n1_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8181639 0.4777044    Fold1
## 2 0.8161620 0.4615179    Fold3
## 3 0.8132495 0.4558208    Fold2
ad_tda_kde_5.60.5_n1_svm_fit_re<-Adult_TDA_KDE_5.60.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.60.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.60.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n1_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.60.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6989   937
##      >50K     427  1415
##                                           
##                Accuracy : 0.8604          
##                  95% CI : (0.8533, 0.8672)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5875          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9424          
##             Specificity : 0.6016          
##          Pos Pred Value : 0.8818          
##          Neg Pred Value : 0.7682          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7155          
##    Detection Prevalence : 0.8114          
##       Balanced Accuracy : 0.7720          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6989   937
##      >50K     427  1415
##                                           
##                Accuracy : 0.8604          
##                  95% CI : (0.8533, 0.8672)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5875          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9424          
##             Specificity : 0.6016          
##          Pos Pred Value : 0.8818          
##          Neg Pred Value : 0.7682          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7155          
##    Detection Prevalence : 0.8114          
##       Balanced Accuracy : 0.7720          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.603604e-01   5.875345e-01   8.533280e-01   8.671771e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.532756e-136   3.273206e-43
ad_tda_kde_5.60.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9424218            0.6016156            0.8817815 
##       Neg Pred Value            Precision               Recall 
##            0.7681868            0.8817815            0.9424218 
##                   F1           Prevalence       Detection Rate 
##            0.9110937            0.7592138            0.7154996 
## Detection Prevalence    Balanced Accuracy 
##            0.8114251            0.7720187
ad_tda_kde_5.60.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_svm_n1_3_fold<-(ad_svm_fit_re- ad_tda_kde_5.60.5_n1_svm_fit_re)
diff_tda_kde_5.60.5_svm_n1_3_fold
##      Accuracy
## 1 0.006108792
## 2 0.006159650
## 3 0.010256726
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n1_3_fold$probRight
bst_tda_kde_5.60.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.9527667
## 
## $winRight
## [1] 0.04723333
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n1_3_fold
## $left
## [1] 0.004057276
## 
## $rope
## [1] 0.8674647
## 
## $right
## [1] 0.128478
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold))
#bf_tda_kde_5.60.5_svm.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold)
## t = 5.4636, df = 2, p-value = 0.0319
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.001595482 0.013421297
## sample estimates:
##  mean of x 
## 0.00750839
### Test set diff
diff_tda_kde_5.60.5_svm.n1_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n1_test
##    Accuracy 
## -0.03337428
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n1_test_odds.left<-bst_tda_kde_5.60.5_svm.n1_test$probLeft/bst_tda_kde_5.60.5_svm.n1_test$probRight
bst_tda_kde_5.60.5_svm.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n1_test
## $winLeft
## [1] 0.8396
## 
## $winRope
## [1] 0.1604
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n1_test))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n1_test)) #bf_tda_kde_5.60.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n1_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.60.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.60.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 14482 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9655, 9655, 9654 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7926395  0.401702058
##    0.1   0.50  0.8007185  0.444429113
##    0.1   0.75  0.8038256  0.462387444
##    0.1   1.00  0.8054830  0.475690686
##    0.1   1.25  0.8057592  0.480563318
##    1.0   0.25  0.7466509  0.164469866
##    1.0   0.50  0.7651567  0.261455005
##    1.0   0.75  0.7701283  0.300833599
##    1.0   1.00  0.7739262  0.329642193
##    1.0   1.25  0.7762052  0.347570793
##   10.0   0.25  0.7185472  0.001407236
##   10.0   0.50  0.7235188  0.029221499
##   10.0   0.75  0.7283524  0.064216885
##   10.0   1.00  0.7328408  0.098316679
##   10.0   1.25  0.7347052  0.118500663
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.60.5_n2_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8062979 0.4826868    Fold1
## 2 0.8003314 0.4662903    Fold3
## 3 0.8106484 0.4927129    Fold2
ad_tda_kde_5.60.5_n2_svm_fit_re<-Adult_TDA_KDE_5.60.5_n2_SvmFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.60.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.60.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7030   926
##      >50K     386  1426
##                                           
##                Accuracy : 0.8657          
##                  95% CI : (0.8588, 0.8724)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6014          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9480          
##             Specificity : 0.6063          
##          Pos Pred Value : 0.8836          
##          Neg Pred Value : 0.7870          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7197          
##    Detection Prevalence : 0.8145          
##       Balanced Accuracy : 0.7771          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7030   926
##      >50K     386  1426
##                                           
##                Accuracy : 0.8657          
##                  95% CI : (0.8588, 0.8724)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6014          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9480          
##             Specificity : 0.6063          
##          Pos Pred Value : 0.8836          
##          Neg Pred Value : 0.7870          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7197          
##    Detection Prevalence : 0.8145          
##       Balanced Accuracy : 0.7771          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.656839e-01   6.013843e-01   8.587614e-01   8.723875e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.021665e-151   4.402141e-50
ad_tda_kde_5.60.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9479504            0.6062925            0.8836099 
##       Neg Pred Value            Precision               Recall 
##            0.7869757            0.8836099            0.9479504 
##                   F1           Prevalence       Detection Rate 
##            0.9146500            0.7592138            0.7196970 
## Detection Prevalence    Balanced Accuracy 
##            0.8144963            0.7771214
ad_tda_kde_5.60.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n2_svm_fit_re)
diff_tda_kde_5.60.5_svm_n2_3_fold
##     Accuracy
## 1 0.01797483
## 2 0.02199026
## 3 0.01285775
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n2_3_fold$probRight
bst_tda_kde_5.60.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0901
## 
## $winRight
## [1] 0.9099
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n2_3_fold
## $left
## [1] 0.005999024
## 
## $rope
## [1] 0.05910161
## 
## $right
## [1] 0.9348994
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold))
#bf_tda_kde_5.60.5_svm.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold)
## t = 6.6627, df = 2, p-value = 0.02179
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.006236928 0.028978302
## sample estimates:
##  mean of x 
## 0.01760762
### Test set diff
diff_tda_kde_5.60.5_svm.n2_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n2_test
##    Accuracy 
## -0.03869779
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n2_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n2_test_odds.left<-bst_tda_kde_5.60.5_svm.n2_test$probLeft/bst_tda_kde_5.60.5_svm.n2_test$probRight
bst_tda_kde_5.60.5_svm.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n2_test
## $winLeft
## [1] 0.8402667
## 
## $winRope
## [1] 0.1597333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n2_test))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n2_test)) #bf_tda_kde_5.60.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n2_test))

##Node3

Adult_TDA_KDE_5.60.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.60.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13266 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8844, 8844, 8844 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7893864  0.368408514
##    0.1   0.50  0.8004674  0.424802870
##    0.1   0.75  0.8049148  0.447616022
##    0.1   1.00  0.8052917  0.455670488
##    0.1   1.25  0.8050656  0.459167321
##    1.0   0.25  0.7561435  0.170952931
##    1.0   0.50  0.7658676  0.240972544
##    1.0   0.75  0.7739334  0.289713208
##    1.0   1.00  0.7785316  0.321958954
##    1.0   1.25  0.7802653  0.338553037
##   10.0   0.25  0.7299864  0.001732385
##   10.0   0.50  0.7324740  0.023796311
##   10.0   0.75  0.7358661  0.052819809
##   10.0   1.00  0.7411428  0.093118355
##   10.0   1.25  0.7421981  0.112321505
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.60.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7991859 0.4436653    Fold1
## 2 0.8095884 0.4657166    Fold2
## 3 0.8071009 0.4576296    Fold3
ad_tda_kde_5.60.5_n3_svm_fit_re<-Adult_TDA_KDE_5.60.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.60.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.60.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n3_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.60.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7036  1081
##      >50K     380  1271
##                                           
##                Accuracy : 0.8504          
##                  95% CI : (0.8432, 0.8574)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5446          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9488          
##             Specificity : 0.5404          
##          Pos Pred Value : 0.8668          
##          Neg Pred Value : 0.7698          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7203          
##    Detection Prevalence : 0.8310          
##       Balanced Accuracy : 0.7446          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7036  1081
##      >50K     380  1271
##                                           
##                Accuracy : 0.8504          
##                  95% CI : (0.8432, 0.8574)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5446          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9488          
##             Specificity : 0.5404          
##          Pos Pred Value : 0.8668          
##          Neg Pred Value : 0.7698          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7203          
##    Detection Prevalence : 0.8310          
##       Balanced Accuracy : 0.7446          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.504300e-01   5.445652e-01   8.432017e-01   8.574486e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.574372e-109   6.450309e-75
ad_tda_kde_5.60.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9487594            0.5403912            0.8668227 
##       Neg Pred Value            Precision               Recall 
##            0.7698365            0.8668227            0.9487594 
##                   F1           Prevalence       Detection Rate 
##            0.9059422            0.7592138            0.7203112 
## Detection Prevalence    Balanced Accuracy 
##            0.8309787            0.7445753
ad_tda_kde_5.60.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n3_svm_fit_re)
diff_tda_kde_5.60.5_svm_n3_3_fold
##     Accuracy
## 1 0.02508685
## 2 0.01273324
## 3 0.01640533
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n3_3_fold$probRight
bst_tda_kde_5.60.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.08963333
## 
## $winRight
## [1] 0.9103667
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n3_3_fold
## $left
## [1] 0.01097393
## 
## $rope
## [1] 0.08723392
## 
## $right
## [1] 0.9017921
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold))
#bf_tda_kde_5.60.5_svm.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold)
## t = 4.935, df = 2, p-value = 0.03869
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.002316204 0.033834075
## sample estimates:
##  mean of x 
## 0.01807514
### Test set diff
diff_tda_kde_5.60.5_svm.n3_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n3_test
##   Accuracy 
## -0.0234439
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n3_test_odds.left<-bst_tda_kde_5.60.5_svm.n3_test$probLeft/bst_tda_kde_5.60.5_svm.n3_test$probRight
bst_tda_kde_5.60.5_svm.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n3_test
## $winLeft
## [1] 0.8407333
## 
## $winRope
## [1] 0.1592667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n3_test))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n3_test)) #bf_tda_kde_5.60.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n3_test))

##Node4

Adult_TDA_KDE_5.60.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.60.5.n4.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11795 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7863, 7864, 7863 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.8176347  0.299483643
##    0.1   0.50  0.8269606  0.376307547
##    0.1   0.75  0.8315389  0.409558299
##    0.1   1.00  0.8331498  0.425270442
##    0.1   1.25  0.8327257  0.431663114
##    1.0   0.25  0.7994916  0.127471144
##    1.0   0.50  0.8068670  0.199677506
##    1.0   0.75  0.8096650  0.245392404
##    1.0   1.00  0.8133952  0.281703866
##    1.0   1.25  0.8136496  0.298986548
##   10.0   0.25  0.7861806  0.001075727
##   10.0   0.50  0.7864350  0.009224312
##   10.0   0.75  0.7880459  0.035149825
##   10.0   1.00  0.7905895  0.071308500
##   10.0   1.25  0.7916916  0.096655028
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.60.5_n4_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8344354 0.4327856    Fold1
## 2 0.8351564 0.4270883    Fold2
## 3 0.8298576 0.4159374    Fold3
ad_tda_kde_5.60.5_n4_svm_fit_re<-Adult_TDA_KDE_5.60.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.60.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.60.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.60.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7136  1448
##      >50K     280   904
##                                           
##                Accuracy : 0.8231          
##                  95% CI : (0.8154, 0.8306)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4174          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9622          
##             Specificity : 0.3844          
##          Pos Pred Value : 0.8313          
##          Neg Pred Value : 0.7635          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7305          
##    Detection Prevalence : 0.8788          
##       Balanced Accuracy : 0.6733          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7136  1448
##      >50K     280   904
##                                           
##                Accuracy : 0.8231          
##                  95% CI : (0.8154, 0.8306)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4174          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9622          
##             Specificity : 0.3844          
##          Pos Pred Value : 0.8313          
##          Neg Pred Value : 0.7635          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7305          
##    Detection Prevalence : 0.8788          
##       Balanced Accuracy : 0.6733          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.230958e-01   4.173615e-01   8.153814e-01   8.306171e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   6.147212e-53  2.054904e-173
ad_tda_kde_5.60.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9622438            0.3843537            0.8313141 
##       Neg Pred Value            Precision               Recall 
##            0.7635135            0.8313141            0.9622438 
##                   F1           Prevalence       Detection Rate 
##            0.8920000            0.7592138            0.7305487 
## Detection Prevalence    Balanced Accuracy 
##            0.8787879            0.6732988
ad_tda_kde_5.60.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n4_svm_fit_re)
diff_tda_kde_5.60.5_svm_n4_3_fold
##       Accuracy
## 1 -0.010162663
## 2 -0.012834785
## 3 -0.006351393
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n4_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n4_3_fold$probRight
bst_tda_kde_5.60.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n4_3_fold
## $winLeft
## [1] 0.3290667
## 
## $winRope
## [1] 0.6709333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n4_3_fold
## $left
## [1] 0.46476
## 
## $rope
## [1] 0.5293185
## 
## $right
## [1] 0.005921444
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold))
#bf_tda_kde_5.60.5_svm.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold)
## t = -5.2004, df = 2, p-value = 0.03504
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.01787709 -0.00168880
## sample estimates:
##    mean of x 
## -0.009782947
### Test set diff
diff_tda_kde_5.60.5_svm.n4_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n4_test
##    Accuracy 
## 0.003890254
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n4_test_odds.left<-bst_tda_kde_5.60.5_svm.n4_test$probLeft/bst_tda_kde_5.60.5_svm.n4_test$probRight
bst_tda_kde_5.60.5_svm.n4_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n4_test))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n4_test)) #bf_tda_kde_5.60.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n4_test))

##Node5

Adult_TDA_KDE_5.60.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.60.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13266 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8844, 8844, 8844 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7899141  0.374262485
##    0.1   0.50  0.8003166  0.428756800
##    0.1   0.75  0.8029549  0.446138492
##    0.1   1.00  0.8032564  0.454106944
##    0.1   1.25  0.8034826  0.460489464
##    1.0   0.25  0.7546359  0.163067046
##    1.0   0.50  0.7671491  0.244132494
##    1.0   0.75  0.7748379  0.291564920
##    1.0   1.00  0.7780793  0.319779932
##    1.0   1.25  0.7801146  0.338322308
##   10.0   0.25  0.7298357  0.000919086
##   10.0   0.50  0.7327001  0.025480097
##   10.0   0.75  0.7363938  0.055986804
##   10.0   1.00  0.7418212  0.097166193
##   10.0   1.25  0.7434796  0.118338862
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.60.5_n5_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8012212 0.4547974    Fold1
## 2 0.8043872 0.4637866    Fold3
## 3 0.8048394 0.4628844    Fold2
ad_tda_kde_5.60.5_n5_svm_fit_re<-Adult_TDA_KDE_5.60.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.60.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.60.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n5_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.60.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7014  1035
##      >50K     402  1317
##                                           
##                Accuracy : 0.8529          
##                  95% CI : (0.8457, 0.8599)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5569          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9458          
##             Specificity : 0.5599          
##          Pos Pred Value : 0.8714          
##          Neg Pred Value : 0.7661          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7181          
##    Detection Prevalence : 0.8240          
##       Balanced Accuracy : 0.7529          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7014  1035
##      >50K     402  1317
##                                           
##                Accuracy : 0.8529          
##                  95% CI : (0.8457, 0.8599)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5569          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9458          
##             Specificity : 0.5599          
##          Pos Pred Value : 0.8714          
##          Neg Pred Value : 0.7661          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7181          
##    Detection Prevalence : 0.8240          
##       Balanced Accuracy : 0.7529          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.528870e-01   5.569159e-01   8.457061e-01   8.598567e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  8.774525e-116   2.093335e-62
ad_tda_kde_5.60.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9457929            0.5599490            0.8714126 
##       Neg Pred Value            Precision               Recall 
##            0.7661431            0.8714126            0.9457929 
##                   F1           Prevalence       Detection Rate 
##            0.9070805            0.7592138            0.7180590 
## Detection Prevalence    Balanced Accuracy 
##            0.8240172            0.7528709
ad_tda_kde_5.60.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n5_svm_fit_re)
diff_tda_kde_5.60.5_svm_n5_3_fold
##     Accuracy
## 1 0.02305157
## 2 0.01793451
## 3 0.01866675
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n5_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n5_3_fold$probRight
bst_tda_kde_5.60.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n5_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.09083333
## 
## $winRight
## [1] 0.9091667
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n5_3_fold
## $left
## [1] 0.00189468
## 
## $rope
## [1] 0.01466324
## 
## $right
## [1] 0.9834421
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold))
#bf_tda_kde_5.60.5_svm.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold)
## t = 12.446, df = 2, p-value = 0.006394
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01300996 0.02675859
## sample estimates:
##  mean of x 
## 0.01988428
nn1Grid<-expand.grid(size = c(2,3,5,7), decay = c(0.3,0.5,0.7))
#Neural Network 
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                            Importance = T,
                      method = 'nnet', 
                      trControl = fitControl,
                      tuneGrid = nn1Grid,
                      metric='Accuracy')
## # weights:  221
## initial  value 14914.247485 
## iter  10 value 8387.827451
## iter  10 value 8387.827445
## final  value 8387.827314 
## converged
## # weights:  331
## initial  value 8934.347194 
## iter  10 value 7808.621355
## iter  20 value 7695.138728
## iter  30 value 7694.826011
## iter  40 value 7694.718736
## iter  50 value 7690.104046
## iter  60 value 7680.254181
## iter  70 value 7659.971283
## iter  80 value 7653.768876
## iter  90 value 7645.410266
## iter 100 value 7525.511899
## final  value 7525.511899 
## stopped after 100 iterations
## # weights:  551
## initial  value 15270.611564 
## iter  10 value 8317.553863
## iter  20 value 8140.854287
## iter  30 value 7730.047655
## iter  40 value 7684.776575
## iter  50 value 7666.542234
## iter  60 value 7643.445407
## iter  70 value 7581.690526
## iter  80 value 7543.649568
## iter  90 value 7505.937117
## iter 100 value 7363.796783
## final  value 7363.796783 
## stopped after 100 iterations
## # weights:  771
## initial  value 16294.703309 
## iter  10 value 7786.612502
## iter  20 value 7685.449279
## iter  30 value 7671.905931
## iter  40 value 7660.658740
## iter  50 value 7646.353470
## iter  60 value 7631.106499
## iter  70 value 7592.530816
## iter  80 value 7542.472357
## iter  90 value 7504.258442
## iter 100 value 7488.577910
## final  value 7488.577910 
## stopped after 100 iterations
## # weights:  221
## initial  value 15499.745841 
## iter  10 value 8388.053418
## iter  20 value 8388.042862
## iter  20 value 8388.042799
## iter  20 value 8388.042793
## final  value 8388.042793 
## converged
## # weights:  331
## initial  value 12875.025503 
## iter  10 value 8267.489254
## iter  20 value 7708.902559
## iter  30 value 7707.959692
## iter  40 value 7683.777898
## iter  50 value 7571.977732
## iter  60 value 7477.751846
## iter  70 value 7471.083962
## iter  80 value 7397.102950
## iter  90 value 7372.392331
## iter 100 value 7341.382346
## final  value 7341.382346 
## stopped after 100 iterations
## # weights:  551
## initial  value 10967.055006 
## iter  10 value 8333.175880
## iter  20 value 8323.905758
## iter  30 value 8113.213696
## iter  40 value 7573.865713
## iter  50 value 7543.210403
## iter  60 value 7516.513403
## iter  70 value 7054.955298
## iter  80 value 6544.396974
## iter  90 value 6244.004392
## iter 100 value 5864.693839
## final  value 5864.693839 
## stopped after 100 iterations
## # weights:  771
## initial  value 8557.405771 
## iter  10 value 7721.896197
## iter  20 value 7703.675682
## iter  30 value 7679.899357
## iter  40 value 7591.105270
## iter  50 value 7579.772570
## iter  60 value 7511.924843
## iter  70 value 7469.908575
## iter  80 value 7464.322207
## iter  90 value 7330.304185
## iter 100 value 6602.928255
## final  value 6602.928255 
## stopped after 100 iterations
## # weights:  221
## initial  value 10796.872751 
## iter  10 value 8386.359593
## iter  20 value 7752.958524
## iter  30 value 7722.945264
## iter  40 value 7663.040625
## iter  50 value 7621.125211
## iter  60 value 7607.043979
## iter  70 value 7582.395552
## iter  80 value 7400.172667
## iter  90 value 7215.413595
## iter 100 value 6615.114583
## final  value 6615.114583 
## stopped after 100 iterations
## # weights:  331
## initial  value 11567.665716 
## iter  10 value 8318.682666
## iter  20 value 7836.418097
## iter  30 value 7826.643549
## iter  40 value 7758.193283
## iter  50 value 7735.975032
## iter  60 value 7681.647350
## iter  70 value 7648.037579
## iter  80 value 7548.353306
## iter  90 value 7396.481260
## iter 100 value 6548.994585
## final  value 6548.994585 
## stopped after 100 iterations
## # weights:  551
## initial  value 8684.052730 
## iter  10 value 8101.316344
## iter  20 value 7719.153662
## iter  30 value 7705.102736
## iter  40 value 7328.030580
## iter  50 value 6936.281710
## iter  60 value 6904.159106
## iter  70 value 6503.108413
## iter  80 value 6083.116519
## iter  90 value 5728.454514
## iter 100 value 5540.287574
## final  value 5540.287574 
## stopped after 100 iterations
## # weights:  771
## initial  value 16698.339714 
## iter  10 value 7809.493522
## iter  20 value 7542.760966
## iter  30 value 7529.377824
## iter  40 value 7460.839476
## iter  50 value 7189.834582
## iter  60 value 6990.211510
## iter  70 value 6981.244136
## iter  80 value 6691.671055
## iter  90 value 5748.327853
## iter 100 value 5307.371888
## final  value 5307.371888 
## stopped after 100 iterations
## # weights:  221
## initial  value 9007.370545 
## iter  10 value 8387.893793
## iter  20 value 8370.412757
## iter  30 value 8355.144558
## iter  40 value 8062.565654
## iter  50 value 7772.953460
## iter  60 value 7374.245172
## iter  70 value 6661.141222
## iter  80 value 6248.936301
## iter  90 value 6128.051239
## iter 100 value 5992.725327
## final  value 5992.725327 
## stopped after 100 iterations
## # weights:  331
## initial  value 10951.458356 
## iter  10 value 8332.325545
## iter  20 value 7823.972465
## iter  30 value 7780.903226
## iter  40 value 7745.630955
## iter  50 value 7739.642084
## iter  60 value 7738.543237
## iter  70 value 7728.614195
## iter  80 value 7657.519923
## iter  90 value 7538.164705
## iter 100 value 7517.264919
## final  value 7517.264919 
## stopped after 100 iterations
## # weights:  551
## initial  value 9491.348203 
## iter  10 value 8233.914449
## iter  20 value 7970.146725
## iter  30 value 7800.374269
## iter  40 value 7796.272701
## iter  50 value 7787.750936
## iter  60 value 7760.391994
## iter  70 value 7616.086201
## iter  80 value 7455.184358
## iter  90 value 7409.753912
## iter 100 value 7343.103830
## final  value 7343.103830 
## stopped after 100 iterations
## # weights:  771
## initial  value 8435.606022 
## iter  10 value 8314.294684
## iter  20 value 7834.906866
## iter  30 value 7617.521283
## iter  40 value 7601.464600
## iter  50 value 7596.253449
## iter  60 value 7456.076859
## iter  70 value 7431.776587
## iter  80 value 7160.344816
## iter  90 value 6908.360433
## iter 100 value 6139.029521
## final  value 6139.029521 
## stopped after 100 iterations
## # weights:  221
## initial  value 12984.644054 
## iter  10 value 8565.375631
## iter  20 value 7775.349984
## iter  30 value 7774.925575
## iter  40 value 7756.925989
## iter  50 value 7608.131861
## iter  60 value 7550.067860
## iter  70 value 7419.373239
## iter  80 value 7071.728904
## iter  90 value 6310.673260
## iter 100 value 5698.521509
## final  value 5698.521509 
## stopped after 100 iterations
## # weights:  331
## initial  value 13843.624590 
## iter  10 value 8288.530559
## iter  20 value 7665.333543
## iter  30 value 7581.870900
## iter  30 value 7581.870885
## final  value 7581.870885 
## converged
## # weights:  551
## initial  value 13089.968565 
## iter  10 value 8044.819335
## iter  20 value 7845.610189
## iter  30 value 7681.548660
## iter  40 value 7618.996656
## iter  50 value 7572.067203
## iter  60 value 7540.338331
## iter  70 value 7522.611949
## iter  80 value 7311.586164
## iter  90 value 6996.529531
## iter 100 value 6655.909450
## final  value 6655.909450 
## stopped after 100 iterations
## # weights:  771
## initial  value 9765.548507 
## iter  10 value 8212.363355
## iter  20 value 7824.273923
## iter  30 value 7772.815641
## iter  40 value 7751.053394
## iter  50 value 7732.036973
## iter  60 value 7668.934206
## iter  70 value 7646.818132
## iter  80 value 7601.303018
## iter  90 value 7493.928824
## iter 100 value 7418.581645
## final  value 7418.581645 
## stopped after 100 iterations
## # weights:  221
## initial  value 8849.572798 
## iter  10 value 7970.702123
## iter  20 value 7816.401644
## iter  30 value 7812.858798
## iter  40 value 7757.905728
## iter  50 value 7577.625843
## iter  60 value 7520.045779
## iter  70 value 7456.798749
## iter  80 value 7313.957474
## iter  90 value 7164.603052
## iter 100 value 6121.989153
## final  value 6121.989153 
## stopped after 100 iterations
## # weights:  331
## initial  value 9379.607285 
## iter  10 value 8388.421617
## iter  20 value 8369.172128
## iter  30 value 8332.031073
## iter  40 value 8310.905049
## iter  50 value 7822.778864
## iter  60 value 7471.508794
## iter  70 value 6777.512118
## iter  80 value 6175.471431
## iter  90 value 5609.965802
## iter 100 value 5282.472146
## final  value 5282.472146 
## stopped after 100 iterations
## # weights:  551
## initial  value 14172.587821 
## iter  10 value 8161.102118
## iter  20 value 7841.393426
## iter  30 value 7752.389698
## iter  40 value 7748.583088
## iter  50 value 7740.483911
## iter  60 value 7585.009885
## iter  70 value 7548.991549
## iter  80 value 7533.416049
## iter  90 value 7523.933185
## iter 100 value 7504.973765
## final  value 7504.973765 
## stopped after 100 iterations
## # weights:  771
## initial  value 8440.807063 
## iter  10 value 7775.610906
## iter  20 value 7762.108417
## iter  30 value 7747.050717
## iter  40 value 7742.501701
## iter  50 value 7731.745854
## iter  60 value 7619.866913
## iter  70 value 7569.581747
## iter  80 value 7563.850584
## iter  90 value 7465.587301
## iter 100 value 7417.631348
## final  value 7417.631348 
## stopped after 100 iterations
## # weights:  221
## initial  value 11368.699999 
## final  value 8389.250891 
## converged
## # weights:  331
## initial  value 14067.553992 
## iter  10 value 8389.251061
## final  value 8389.250871 
## converged
## # weights:  551
## initial  value 12931.852167 
## iter  10 value 8048.585914
## iter  20 value 8033.248236
## final  value 8032.929443 
## converged
## # weights:  771
## initial  value 9359.205323 
## iter  10 value 8341.182609
## iter  20 value 7764.602072
## iter  30 value 7656.621439
## iter  40 value 7619.348444
## iter  50 value 7488.368833
## iter  60 value 7334.702048
## iter  70 value 7046.359244
## iter  80 value 6330.604012
## iter  90 value 5579.032428
## iter 100 value 5532.368850
## final  value 5532.368850 
## stopped after 100 iterations
## # weights:  221
## initial  value 12741.268962 
## iter  10 value 8381.909271
## iter  20 value 8039.954343
## iter  30 value 7780.214751
## iter  40 value 7772.950439
## iter  50 value 7769.281228
## final  value 7766.136267 
## converged
## # weights:  331
## initial  value 8812.657497 
## iter  10 value 8243.470361
## iter  20 value 7861.344462
## iter  30 value 7770.690723
## iter  40 value 7768.443349
## iter  50 value 7762.604336
## iter  60 value 7756.711841
## iter  70 value 7686.251124
## iter  80 value 7603.987494
## iter  90 value 7595.520572
## iter 100 value 7485.892896
## final  value 7485.892896 
## stopped after 100 iterations
## # weights:  551
## initial  value 12214.039002 
## iter  10 value 8257.195400
## iter  20 value 7793.433615
## iter  30 value 7607.795668
## iter  40 value 7564.642816
## iter  50 value 7558.264906
## iter  60 value 7556.097129
## iter  70 value 7553.282434
## iter  80 value 7550.531224
## iter  90 value 7550.152798
## iter 100 value 7549.331992
## final  value 7549.331992 
## stopped after 100 iterations
## # weights:  771
## initial  value 9358.061901 
## iter  10 value 8133.967595
## iter  20 value 7735.799556
## iter  30 value 7525.124606
## iter  40 value 7504.565786
## iter  50 value 7489.430183
## iter  60 value 7470.148511
## iter  70 value 7451.607121
## iter  80 value 7418.243338
## iter  90 value 7402.781472
## iter 100 value 7391.955274
## final  value 7391.955274 
## stopped after 100 iterations
## # weights:  221
## initial  value 12787.513808 
## iter  10 value 8389.608822
## iter  20 value 7785.227973
## iter  30 value 7778.267470
## iter  40 value 7778.233084
## iter  50 value 7774.657702
## iter  50 value 7774.657691
## final  value 7774.657691 
## converged
## # weights:  331
## initial  value 12243.635329 
## iter  10 value 8419.227557
## iter  20 value 8389.265532
## iter  30 value 8361.967251
## iter  40 value 7784.554187
## iter  50 value 7761.896107
## iter  60 value 7743.986618
## iter  70 value 7587.812085
## iter  80 value 7548.866412
## iter  90 value 7391.411653
## iter 100 value 7129.129194
## final  value 7129.129194 
## stopped after 100 iterations
## # weights:  551
## initial  value 12141.619444 
## iter  10 value 8235.199570
## iter  20 value 7998.059485
## iter  30 value 7792.905576
## iter  40 value 7775.919897
## iter  50 value 7768.861347
## iter  60 value 7754.033756
## iter  70 value 7732.614733
## iter  80 value 7699.392212
## iter  90 value 7667.849845
## iter 100 value 7572.755801
## final  value 7572.755801 
## stopped after 100 iterations
## # weights:  771
## initial  value 15261.419720 
## iter  10 value 8364.038019
## iter  20 value 8125.621020
## iter  30 value 7808.378483
## iter  40 value 7628.274448
## iter  50 value 7594.937758
## iter  60 value 7586.909450
## iter  70 value 7576.374040
## iter  80 value 7571.920047
## iter  90 value 7446.012084
## iter 100 value 7349.163902
## final  value 7349.163902 
## stopped after 100 iterations
## # weights:  331
## initial  value 30004.133921 
## iter  10 value 11969.553227
## iter  20 value 11628.553351
## iter  30 value 11423.960902
## iter  40 value 11376.226663
## iter  50 value 11373.077231
## iter  60 value 11338.672213
## iter  70 value 11163.310233
## iter  80 value 11010.896927
## iter  90 value 10524.613644
## iter 100 value 10226.328138
## final  value 10226.328138 
## stopped after 100 iterations
### Test set diff
diff_tda_kde_5.60.5_svm.n5_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n5_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n5_test
##   Accuracy 
## -0.0259009
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n5_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_svm.n5_test_odds.left<-bst_tda_kde_5.60.5_svm.n5_test$probLeft/bst_tda_kde_5.60.5_svm.n5_test$probRight
bst_tda_kde_5.60.5_svm.n5_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n5_test
## $winLeft
## [1] 0.84
## 
## $winRope
## [1] 0.16
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n5_test))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n5_test)) #bf_tda_kde_5.60.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n4_test))


#Non-TDA-Assisted

adultNn1Fit
## Neural Network 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15195, 15196 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   2     0.3    0.7722979  0.07902348
##   2     0.5    0.7902428  0.26366959
##   2     0.7    0.8062120  0.40187788
##   3     0.3    0.7841870  0.15337122
##   3     0.5    0.7984471  0.28576293
##   3     0.7    0.8198126  0.48834476
##   5     0.3    0.7905932  0.22345924
##   5     0.5    0.8086252  0.31627472
##   5     0.7    0.7958584  0.26765852
##   7     0.3    0.8067837  0.33051335
##   7     0.5    0.7998950  0.26059533
##   7     0.7    0.7989735  0.25366012
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.7.
adultNn1Fit$resample
##    Accuracy     Kappa Resample
## 1 0.8079505 0.4185547    Fold3
## 2 0.8500921 0.5902927    Fold2
## 3 0.8013951 0.4561869    Fold1
ad_nn1_fit_re<-adultNn1Fit$resample[1]

summary(adultNn1Fit)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -2.07    -0.04    -0.47    -1.02    -1.34     0.01    -0.01     1.60 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -0.03    -0.74    -0.07     0.00    -0.48     0.37    -0.02    -0.14 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.15     0.37    -0.40    -0.93    -1.26    -5.00     1.95     0.87 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     4.25    -0.50     1.23    -2.50     0.45    -1.88     0.28     1.58 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -1.04     0.07    -0.76    -0.31    -0.45    -0.10     0.14    -1.23 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.91    -0.61    -0.71    -0.03    -1.45    -0.26     2.56     0.11 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -0.84    -0.55     0.43    -2.41    -2.14     0.25    -2.01    -0.35 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     4.61    -1.14    -0.96     0.82    -0.12    -0.67     0.03    -2.09 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.05    -0.35    -0.57    -0.09    -0.24    -0.06 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.02    -0.19     0.02    -0.03     0.05     0.18    -0.12    -0.07 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##    -0.06     0.01     0.00     0.06    -0.17    -0.02     0.33     0.27 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##    -0.01    -0.14     0.03     0.50     0.09     0.03    -0.01     0.01 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.08    -0.84     0.05     0.11    -0.30     0.03    -0.25     0.07 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.01    -0.02    -0.45    -0.06     0.01 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     1.81     0.07     0.17    -0.60     0.17     0.00    -0.36    -0.36 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     2.05     0.74     0.00     0.00     0.64     0.56     0.01    -0.05 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##    -0.47     1.23     1.10    -0.69     0.55    -2.79     0.46    -0.52 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.50     0.15     0.14     0.96     0.01    -0.09     0.00    -1.25 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.12     1.80    -0.10     1.34     0.17    -1.53     0.00    -0.62 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##    -0.40     2.08    -0.64    -0.75     1.54    -0.01    -0.38     0.99 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.68    -0.30     0.99    -0.85    -0.59     1.27     0.41     1.75 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##    -0.17    -0.37     0.32    -0.32     0.14     2.04     1.36     0.45 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00    -0.07    -0.01     0.10     0.01    -0.05     0.09 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.34     0.00     0.00     0.93     0.24    -0.04     0.02     0.45 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.01     0.00     0.00     0.00     0.00    -0.29     0.04     0.02 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00    -0.16     0.01    -0.04     0.00    -0.51     0.11     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.01     0.03     0.06    -0.04    -0.06     0.05    -0.01 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00    -0.01     0.44     0.23    -0.19 
##  b->o h1->o h2->o h3->o 
##  1.26  1.28  1.27 -4.15
vip(adultNn1Fit,25) + ggtitle("non-TDA-Assited NN")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6956  1444
##      >50K     460   908
##                                           
##                Accuracy : 0.8051          
##                  95% CI : (0.7971, 0.8129)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.378           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9380          
##             Specificity : 0.3861          
##          Pos Pred Value : 0.8281          
##          Neg Pred Value : 0.6637          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7121          
##    Detection Prevalence : 0.8600          
##       Balanced Accuracy : 0.6620          
##                                           
##        'Positive' Class :  <=50K          
## 
nn1_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.050778e-01   3.780235e-01   7.970803e-01   8.128930e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.086498e-27  2.213692e-112
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass 
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9379720            0.3860544            0.8280952 
##       Neg Pred Value            Precision               Recall 
##            0.6637427            0.8280952            0.9379720 
##                   F1           Prevalence       Detection Rate 
##            0.8796156            0.7592138            0.7121212 
## Detection Prevalence    Balanced Accuracy 
##            0.8599509            0.6620132
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_PC_5.60.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4824.713668 
## iter  10 value 1492.873618
## iter  20 value 1480.412836
## iter  30 value 1392.296209
## iter  40 value 1219.014250
## iter  50 value 1095.672688
## iter  60 value 1039.633241
## iter  70 value 1013.658978
## iter  80 value 1009.479886
## iter  90 value 1008.325876
## iter 100 value 1008.289157
## final  value 1008.289157 
## stopped after 100 iterations
## # weights:  331
## initial  value 2097.492037 
## iter  10 value 1471.597256
## iter  20 value 1457.429380
## iter  30 value 1427.590351
## iter  40 value 1287.840868
## iter  50 value 1071.950320
## iter  60 value 1032.493835
## iter  70 value 1017.112612
## iter  80 value 1010.874037
## iter  90 value 1001.307134
## iter 100 value 998.421712
## final  value 998.421712 
## stopped after 100 iterations
## # weights:  551
## initial  value 3145.588679 
## iter  10 value 1466.903187
## iter  20 value 1428.358585
## iter  30 value 1320.702943
## iter  40 value 1198.302588
## iter  50 value 1085.634314
## iter  60 value 1066.962941
## iter  70 value 1027.297321
## iter  80 value 1014.696459
## iter  90 value 1014.536065
## iter 100 value 1013.101421
## final  value 1013.101421 
## stopped after 100 iterations
## # weights:  771
## initial  value 2956.220491 
## iter  10 value 1467.772897
## iter  20 value 1426.683725
## iter  30 value 1408.278015
## iter  40 value 1390.349667
## iter  50 value 1364.575860
## iter  60 value 1252.930283
## iter  70 value 1144.571149
## iter  80 value 1076.136885
## iter  90 value 1043.144819
## iter 100 value 1019.657711
## final  value 1019.657711 
## stopped after 100 iterations
## # weights:  221
## initial  value 2657.593141 
## iter  10 value 1472.079642
## final  value 1472.078881 
## converged
## # weights:  331
## initial  value 3466.925526 
## iter  10 value 1474.851869
## iter  20 value 1471.733710
## iter  30 value 1471.695002
## final  value 1471.694950 
## converged
## # weights:  551
## initial  value 3444.429762 
## iter  10 value 1995.433601
## iter  20 value 1480.007340
## iter  30 value 1465.879996
## iter  40 value 1414.249250
## iter  50 value 1409.559954
## iter  60 value 1407.939471
## iter  70 value 1402.098314
## iter  80 value 1397.209534
## iter  90 value 1369.548414
## iter 100 value 1344.980991
## final  value 1344.980991 
## stopped after 100 iterations
## # weights:  771
## initial  value 2497.128925 
## iter  10 value 1473.299176
## iter  20 value 1471.459449
## iter  30 value 1460.491732
## iter  40 value 1432.422786
## iter  50 value 1409.088279
## iter  60 value 1345.969201
## iter  70 value 1283.306298
## iter  80 value 1193.215457
## iter  90 value 1150.741027
## iter 100 value 1149.410801
## final  value 1149.410801 
## stopped after 100 iterations
## # weights:  221
## initial  value 3368.768004 
## iter  10 value 1479.536070
## iter  20 value 1448.650168
## iter  30 value 1398.488885
## iter  40 value 1382.544419
## iter  50 value 1332.849218
## iter  60 value 1165.153945
## iter  70 value 1082.990815
## iter  80 value 1056.947686
## iter  90 value 1051.644462
## iter 100 value 1044.748696
## final  value 1044.748696 
## stopped after 100 iterations
## # weights:  331
## initial  value 3684.101493 
## iter  10 value 1452.510135
## iter  20 value 1437.013993
## iter  30 value 1433.636025
## iter  40 value 1431.897675
## iter  50 value 1413.003762
## iter  60 value 1387.954217
## iter  70 value 1373.875016
## iter  80 value 1313.785145
## iter  90 value 1265.327991
## iter 100 value 1129.258050
## final  value 1129.258050 
## stopped after 100 iterations
## # weights:  551
## initial  value 2365.301084 
## iter  10 value 1474.325956
## iter  20 value 1443.563278
## iter  30 value 1442.059548
## iter  40 value 1433.692983
## iter  50 value 1424.342201
## iter  60 value 1395.656566
## iter  70 value 1390.070956
## iter  80 value 1385.076714
## iter  90 value 1364.487179
## iter 100 value 1233.308913
## final  value 1233.308913 
## stopped after 100 iterations
## # weights:  771
## initial  value 3426.458156 
## iter  10 value 1472.220984
## iter  20 value 1456.741820
## iter  30 value 1443.979227
## iter  40 value 1431.542531
## iter  50 value 1419.061699
## iter  60 value 1358.578517
## iter  70 value 1332.310274
## iter  80 value 1326.129798
## iter  90 value 1325.363576
## iter 100 value 1320.901745
## final  value 1320.901745 
## stopped after 100 iterations
## # weights:  221
## initial  value 2636.928893 
## iter  10 value 1468.259419
## iter  20 value 1457.601250
## iter  30 value 1281.943845
## iter  40 value 1151.232140
## iter  50 value 1079.561635
## iter  60 value 1057.953489
## iter  70 value 1047.326317
## iter  80 value 1045.161374
## iter  90 value 1043.685850
## iter 100 value 1043.652747
## final  value 1043.652747 
## stopped after 100 iterations
## # weights:  331
## initial  value 2933.924321 
## iter  10 value 1453.424804
## iter  20 value 1436.482981
## iter  30 value 1422.411935
## iter  40 value 1415.306331
## iter  50 value 1395.736239
## iter  60 value 1373.996481
## iter  70 value 1342.598418
## iter  80 value 1329.938753
## iter  90 value 1257.036356
## iter 100 value 1027.240686
## final  value 1027.240686 
## stopped after 100 iterations
## # weights:  551
## initial  value 5102.206419 
## iter  10 value 1620.946522
## iter  20 value 1462.233398
## iter  30 value 1452.253226
## iter  40 value 1436.868651
## iter  50 value 1419.646632
## iter  60 value 1374.499720
## iter  70 value 1364.272022
## iter  80 value 1333.275450
## iter  90 value 1273.713055
## iter 100 value 1200.037748
## final  value 1200.037748 
## stopped after 100 iterations
## # weights:  771
## initial  value 4176.782620 
## iter  10 value 1473.601787
## iter  20 value 1444.673774
## iter  30 value 1435.914901
## iter  40 value 1422.676316
## iter  50 value 1376.124151
## iter  60 value 1330.421199
## iter  70 value 1326.072765
## iter  80 value 1314.812803
## iter  90 value 1236.826179
## iter 100 value 1221.373137
## final  value 1221.373137 
## stopped after 100 iterations
## # weights:  221
## initial  value 2398.900929 
## iter  10 value 1439.719491
## iter  20 value 1432.069452
## iter  30 value 1430.355364
## iter  40 value 1416.393778
## iter  50 value 1397.143797
## iter  60 value 1393.493922
## final  value 1393.226234 
## converged
## # weights:  331
## initial  value 4784.442827 
## iter  10 value 1471.613920
## iter  20 value 1464.579482
## iter  30 value 1459.006499
## iter  40 value 1425.663469
## iter  50 value 1402.697636
## iter  60 value 1401.960954
## iter  70 value 1392.498789
## iter  80 value 1370.999354
## iter  90 value 1343.824223
## iter 100 value 1210.444177
## final  value 1210.444177 
## stopped after 100 iterations
## # weights:  551
## initial  value 4244.839569 
## iter  10 value 1471.585273
## iter  20 value 1462.049917
## iter  30 value 1424.398076
## iter  40 value 1409.093392
## iter  50 value 1401.481485
## iter  60 value 1392.838454
## iter  70 value 1378.989459
## iter  80 value 1377.123927
## iter  90 value 1376.139706
## iter 100 value 1375.273429
## final  value 1375.273429 
## stopped after 100 iterations
## # weights:  771
## initial  value 2047.729453 
## iter  10 value 1473.583468
## iter  20 value 1456.392547
## iter  30 value 1423.232831
## iter  40 value 1414.543663
## iter  50 value 1412.257873
## iter  60 value 1409.596927
## iter  70 value 1388.033894
## iter  80 value 1382.532539
## iter  90 value 1374.519560
## iter 100 value 1344.064404
## final  value 1344.064404 
## stopped after 100 iterations
## # weights:  221
## initial  value 6686.092826 
## iter  10 value 1668.962278
## iter  20 value 1516.548295
## iter  30 value 1443.822664
## iter  40 value 1426.711792
## iter  50 value 1399.522807
## iter  60 value 1398.867336
## iter  70 value 1397.036245
## iter  80 value 1386.393067
## iter  90 value 1320.816627
## iter 100 value 1221.568781
## final  value 1221.568781 
## stopped after 100 iterations
## # weights:  331
## initial  value 3014.616130 
## iter  10 value 1474.872398
## iter  20 value 1446.158806
## iter  30 value 1360.836601
## iter  40 value 1190.528729
## iter  50 value 1117.877457
## iter  60 value 1084.625007
## iter  70 value 1076.710308
## iter  80 value 1074.963430
## iter  90 value 1074.881126
## iter 100 value 1074.512192
## final  value 1074.512192 
## stopped after 100 iterations
## # weights:  551
## initial  value 6717.327313 
## iter  10 value 1673.648481
## iter  20 value 1651.254576
## iter  30 value 1445.438888
## iter  40 value 1445.200087
## iter  50 value 1439.782757
## iter  60 value 1429.063333
## iter  70 value 1405.977708
## iter  80 value 1391.384082
## iter  90 value 1315.214495
## iter 100 value 1243.699500
## final  value 1243.699500 
## stopped after 100 iterations
## # weights:  771
## initial  value 2965.067855 
## iter  10 value 1473.097898
## iter  20 value 1443.169315
## iter  30 value 1433.986250
## iter  40 value 1426.203150
## iter  50 value 1418.231668
## iter  60 value 1394.675980
## iter  70 value 1391.734802
## iter  80 value 1350.793840
## iter  90 value 1282.407739
## iter 100 value 1158.320625
## final  value 1158.320625 
## stopped after 100 iterations
## # weights:  221
## initial  value 6390.504817 
## iter  10 value 1477.769597
## iter  20 value 1464.837523
## iter  30 value 1442.096311
## iter  40 value 1420.459299
## iter  50 value 1396.465551
## iter  60 value 1230.529287
## iter  70 value 1169.601150
## iter  80 value 1126.646573
## iter  90 value 1084.043140
## iter 100 value 1001.464921
## final  value 1001.464921 
## stopped after 100 iterations
## # weights:  331
## initial  value 3361.124445 
## iter  10 value 1459.985739
## iter  20 value 1429.268291
## iter  30 value 1328.552884
## iter  40 value 1070.713798
## iter  50 value 1028.625594
## iter  60 value 1026.706393
## final  value 1026.625424 
## converged
## # weights:  551
## initial  value 4761.323456 
## iter  10 value 1455.260056
## iter  20 value 1437.605407
## iter  30 value 1423.793227
## iter  40 value 1330.242099
## iter  50 value 1225.524969
## iter  60 value 1204.906870
## iter  70 value 1155.498214
## iter  80 value 1059.054605
## iter  90 value 1039.869380
## iter 100 value 1023.465603
## final  value 1023.465603 
## stopped after 100 iterations
## # weights:  771
## initial  value 1669.619865 
## iter  10 value 1451.850212
## iter  20 value 1435.723319
## iter  30 value 1406.557746
## iter  40 value 1392.190299
## iter  50 value 1374.095793
## iter  60 value 1301.758400
## iter  70 value 1274.526920
## iter  80 value 1149.066283
## iter  90 value 1088.825158
## iter 100 value 1054.467401
## final  value 1054.467401 
## stopped after 100 iterations
## # weights:  221
## initial  value 4804.295432 
## iter  10 value 1472.756827
## iter  20 value 1472.110176
## iter  30 value 1472.044447
## iter  40 value 1466.089087
## iter  50 value 1410.754282
## iter  60 value 1396.222472
## iter  70 value 1350.890558
## iter  80 value 1246.561903
## iter  90 value 1147.473740
## iter 100 value 1140.339777
## final  value 1140.339777 
## stopped after 100 iterations
## # weights:  331
## initial  value 3298.120749 
## iter  10 value 1472.326209
## iter  20 value 1458.558007
## iter  30 value 1419.509844
## iter  40 value 1323.616529
## iter  50 value 1171.115521
## iter  60 value 1060.869627
## iter  70 value 1025.910988
## iter  80 value 1013.487355
## iter  90 value 1011.593944
## iter 100 value 1011.268930
## final  value 1011.268930 
## stopped after 100 iterations
## # weights:  551
## initial  value 5866.677144 
## iter  10 value 1461.812922
## iter  20 value 1457.246562
## iter  30 value 1429.547463
## iter  40 value 1295.832536
## iter  50 value 1178.216810
## iter  60 value 1143.135307
## iter  70 value 1129.080896
## iter  80 value 1122.959905
## iter  90 value 1097.501312
## iter 100 value 1010.708250
## final  value 1010.708250 
## stopped after 100 iterations
## # weights:  771
## initial  value 3071.871631 
## iter  10 value 1466.441731
## iter  20 value 1438.316847
## iter  30 value 1432.286345
## iter  40 value 1395.781508
## iter  50 value 1285.514639
## iter  60 value 1175.359101
## iter  70 value 1152.644601
## iter  80 value 1147.918115
## iter  90 value 1107.407902
## iter 100 value 1036.355254
## final  value 1036.355254 
## stopped after 100 iterations
## # weights:  221
## initial  value 2593.364767 
## iter  10 value 1498.988768
## iter  20 value 1472.911549
## iter  30 value 1441.064739
## iter  40 value 1426.524821
## iter  50 value 1398.359390
## iter  60 value 1390.766745
## iter  70 value 1378.018705
## iter  80 value 1367.441075
## iter  90 value 1351.338568
## iter 100 value 1300.389742
## final  value 1300.389742 
## stopped after 100 iterations
## # weights:  331
## initial  value 2449.457775 
## iter  10 value 1468.059192
## iter  20 value 1434.573716
## iter  30 value 1339.907100
## iter  40 value 1189.837536
## iter  50 value 1113.220989
## iter  60 value 1046.688534
## iter  70 value 1035.780491
## iter  80 value 1029.641756
## iter  90 value 1021.614939
## iter 100 value 1020.642055
## final  value 1020.642055 
## stopped after 100 iterations
## # weights:  551
## initial  value 5223.585908 
## iter  10 value 1476.340975
## iter  20 value 1471.943390
## iter  30 value 1458.884468
## iter  40 value 1449.623659
## iter  50 value 1416.939740
## iter  60 value 1405.551820
## iter  70 value 1388.484469
## iter  80 value 1318.078308
## iter  90 value 1170.539861
## iter 100 value 1101.509198
## final  value 1101.509198 
## stopped after 100 iterations
## # weights:  771
## initial  value 2598.410600 
## iter  10 value 1471.674802
## iter  20 value 1459.576226
## iter  30 value 1421.208839
## iter  40 value 1407.012400
## iter  50 value 1396.880802
## iter  60 value 1378.316513
## iter  70 value 1322.624729
## iter  80 value 1302.609627
## iter  90 value 1268.462702
## iter 100 value 1122.224023
## final  value 1122.224023 
## stopped after 100 iterations
## # weights:  331
## initial  value 6220.374208 
## iter  10 value 2207.449829
## iter  20 value 2171.396919
## iter  30 value 2145.880105
## iter  40 value 2142.914904
## iter  50 value 2142.521786
## iter  60 value 2107.920068
## iter  70 value 2096.600242
## iter  80 value 2089.615411
## iter  90 value 2050.313852
## iter 100 value 1882.385342
## final  value 1882.385342 
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n1_NN1Fit0
## Neural Network 
## 
## 6560 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 4374, 4373, 4373 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   2     0.3    0.8984758  0.15562165
##   2     0.5    0.8948171  0.00000000
##   2     0.7    0.8964944  0.04998668
##   3     0.3    0.9009151  0.19970302
##   3     0.5    0.8983226  0.18479662
##   3     0.7    0.8955791  0.11492999
##   5     0.3    0.8971041  0.12616547
##   5     0.5    0.8966461  0.10052084
##   5     0.7    0.8948171  0.00000000
##   7     0.3    0.8932930  0.24529946
##   7     0.5    0.8948171  0.00000000
##   7     0.7    0.8948171  0.00000000
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_PC_5.60.5_n1_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8948331 0.0000000    Fold3
## 2 0.9039781 0.3271932    Fold2
## 3 0.9039341 0.2719158    Fold1
ad_tda_pc_5.60.5_n1_nn1_fit_re<-Adult_TDA_PC_5.60.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n1_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     1.89    -0.21     0.54    -0.23    -0.12     0.00     4.80    -3.15 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##    -0.35     0.40     0.00     0.00     0.57    -0.04     0.46     0.01 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.06     0.39     0.05     1.11    -2.41    -0.94    -0.06     2.78 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##    -1.73     0.00    -0.02     1.65    -0.48     0.24    -0.03     1.70 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00    -0.03     0.00     0.01     0.54     1.28     0.04     0.41 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##    -2.70    -1.18     0.12     0.83     0.38     0.00    -0.74     1.06 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     2.01     0.76    -0.93     1.11     0.38     0.14     0.03    -0.02 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.24     0.20    -1.34     1.07     0.15     1.79     0.17     1.71 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00    -0.06     0.53     0.19     1.32     0.18    -0.03 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.19     0.00    -0.02     0.04     0.03    -0.07     0.41    -1.25 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.09     0.13    -1.49    -1.20 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##    -0.01     0.02     0.00     0.54    -0.02     0.03     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##    -0.01     0.01    -0.08    -0.01     0.03     0.00     0.04     0.14 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     2.05     0.06     0.06 
##  b->o h1->o h2->o h3->o 
##  0.44  0.44  0.44  2.48
vip(Adult_TDA_PC_5.60.5_n1_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.60.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.0000000            1.0000000                  NaN 
##       Neg Pred Value            Precision               Recall 
##            0.2407862                   NA            0.0000000 
##                   F1           Prevalence       Detection Rate 
##                   NA            0.7592138            0.0000000 
## Detection Prevalence    Balanced Accuracy 
##            0.0000000            0.5000000
ad_tda_pc_5.60.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n1_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n1_3_fold
##      Accuracy
## 1 -0.08688260
## 2 -0.05388592
## 3 -0.10253902
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9906
## 
## $winRope
## [1] 0.0094
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n1_3_fold
## $left
## [1] 0.9749097
## 
## $rope
## [1] 0.009350311
## 
## $right
## [1] 0.01574002
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold))
#bf_tda_pca_5.60.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold)
## t = -5.656, df = 2, p-value = 0.02987
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.14279912 -0.01940591
## sample estimates:
##   mean of x 
## -0.08110251
### Test set diff
diff_tda_pca_5.60.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n1_test
##  Accuracy 
## 0.5642916
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n1_test_odds.left<-bst_tda_pca_5.60.5_nn1.n1_test$probLeft/bst_tda_pca_5.60.5_nn1.n1_test$probRight
bst_tda_pca_5.60.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1597667
## 
## $winRight
## [1] 0.8402333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n1_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n1_test)) #bf_tda_pca_5.60.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

##Adult_TDA_PC_5.60.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, size=2, range = 0.6,, type='class')

#Neural Network 1
Adult_TDA_PC_5.60.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 6450.758066 
## iter  10 value 6309.697255
## iter  20 value 6248.242335
## iter  30 value 6196.168761
## iter  40 value 5996.884530
## iter  50 value 5657.588241
## iter  60 value 5298.713682
## iter  70 value 5245.241372
## iter  80 value 5238.941804
## iter  90 value 5214.397612
## iter 100 value 5182.454877
## final  value 5182.454877 
## stopped after 100 iterations
## # weights:  331
## initial  value 6464.083393 
## iter  10 value 6414.227813
## iter  20 value 6097.510821
## iter  30 value 6075.806560
## iter  40 value 6070.106615
## iter  50 value 6068.451097
## iter  60 value 6067.254096
## iter  70 value 5983.338486
## iter  80 value 5975.110529
## iter  90 value 5915.888353
## iter 100 value 5882.564742
## final  value 5882.564742 
## stopped after 100 iterations
## # weights:  551
## initial  value 6497.002383 
## iter  10 value 6342.322249
## iter  20 value 6320.966750
## iter  30 value 6073.101962
## iter  40 value 5966.906998
## iter  50 value 5886.926247
## iter  60 value 5857.718042
## iter  70 value 5849.295641
## iter  80 value 5846.151842
## iter  90 value 5831.903798
## iter 100 value 5814.817319
## final  value 5814.817319 
## stopped after 100 iterations
## # weights:  771
## initial  value 6663.906464 
## iter  10 value 6106.691945
## iter  20 value 6083.096238
## iter  30 value 6079.189633
## iter  40 value 6043.834145
## iter  50 value 5985.489271
## iter  60 value 5971.823402
## iter  70 value 5957.127044
## iter  80 value 5954.335935
## iter  90 value 5949.622641
## iter 100 value 5899.577893
## final  value 5899.577893 
## stopped after 100 iterations
## # weights:  221
## initial  value 6469.576535 
## iter  10 value 6408.124724
## iter  20 value 6095.944211
## iter  30 value 6095.222245
## iter  40 value 6085.754226
## iter  50 value 6073.133829
## iter  60 value 6031.848022
## iter  70 value 6012.941050
## iter  80 value 5791.573699
## iter  90 value 5579.078082
## iter 100 value 5513.679054
## final  value 5513.679054 
## stopped after 100 iterations
## # weights:  331
## initial  value 7071.604647 
## iter  10 value 6199.429926
## iter  20 value 6107.176904
## iter  30 value 6083.139209
## iter  40 value 6068.958497
## iter  50 value 6027.838000
## iter  60 value 5910.421989
## iter  70 value 5367.200409
## iter  80 value 5247.915977
## iter  90 value 5035.096382
## iter 100 value 4875.075219
## final  value 4875.075219 
## stopped after 100 iterations
## # weights:  551
## initial  value 6797.450204 
## iter  10 value 6238.221082
## iter  20 value 6083.042615
## iter  30 value 5970.486123
## iter  40 value 5951.843897
## iter  50 value 5945.713088
## iter  60 value 5932.806739
## iter  70 value 5929.105817
## iter  80 value 5903.284893
## iter  90 value 5879.820730
## iter 100 value 5788.999231
## final  value 5788.999231 
## stopped after 100 iterations
## # weights:  771
## initial  value 6825.407432 
## iter  10 value 6444.279893
## iter  20 value 6291.119891
## iter  30 value 6036.976272
## iter  40 value 6020.378858
## iter  50 value 6006.723295
## iter  60 value 5965.784117
## iter  70 value 5925.135141
## iter  80 value 5886.637105
## iter  90 value 5869.776959
## iter 100 value 5847.939806
## final  value 5847.939806 
## stopped after 100 iterations
## # weights:  221
## initial  value 7590.937235 
## iter  10 value 6395.389194
## iter  20 value 6059.949797
## iter  30 value 6022.300390
## iter  40 value 6009.308271
## iter  50 value 6006.745075
## iter  60 value 6003.718671
## iter  70 value 5931.252209
## iter  80 value 5706.096137
## iter  90 value 5418.028144
## iter 100 value 5281.933657
## final  value 5281.933657 
## stopped after 100 iterations
## # weights:  331
## initial  value 9481.391710 
## iter  10 value 6432.446535
## iter  20 value 6115.958665
## iter  30 value 5996.286711
## iter  40 value 5958.451670
## iter  50 value 5934.514790
## iter  60 value 5928.559269
## iter  70 value 5857.454597
## iter  80 value 5826.449473
## iter  90 value 5755.948928
## iter 100 value 5508.594845
## final  value 5508.594845 
## stopped after 100 iterations
## # weights:  551
## initial  value 7246.565546 
## iter  10 value 6394.702654
## iter  20 value 6098.687050
## iter  30 value 6090.530118
## iter  40 value 6074.369959
## iter  50 value 5951.631917
## iter  60 value 5870.390082
## iter  70 value 5806.713153
## iter  80 value 5797.267147
## iter  90 value 5791.938350
## iter 100 value 5783.234809
## final  value 5783.234809 
## stopped after 100 iterations
## # weights:  771
## initial  value 8277.470395 
## iter  10 value 6436.501778
## iter  20 value 6097.585757
## iter  30 value 5997.125779
## iter  40 value 5967.964505
## iter  50 value 5908.146318
## iter  60 value 5778.852639
## iter  70 value 5627.741895
## iter  80 value 5203.759599
## iter  90 value 5008.078551
## iter 100 value 4922.781490
## final  value 4922.781490 
## stopped after 100 iterations
## # weights:  221
## initial  value 6925.399932 
## iter  10 value 6303.117914
## iter  20 value 6098.541192
## iter  30 value 6096.841079
## iter  40 value 6082.569209
## iter  50 value 5990.770846
## iter  60 value 5950.845740
## iter  70 value 5815.994997
## iter  80 value 5676.894110
## iter  90 value 5476.730729
## iter 100 value 5341.778705
## final  value 5341.778705 
## stopped after 100 iterations
## # weights:  331
## initial  value 7391.644042 
## iter  10 value 6381.995409
## iter  20 value 6139.153122
## iter  30 value 6072.266596
## iter  40 value 5965.968694
## iter  50 value 5860.338451
## iter  60 value 5541.473821
## iter  70 value 5165.721634
## iter  80 value 5149.675476
## iter  90 value 5139.466824
## iter 100 value 5097.839933
## final  value 5097.839933 
## stopped after 100 iterations
## # weights:  551
## initial  value 7849.559342 
## iter  10 value 6177.728556
## iter  20 value 6070.020672
## iter  30 value 6018.883008
## iter  40 value 5999.198181
## iter  50 value 5995.800890
## iter  60 value 5968.836645
## iter  70 value 5949.764938
## iter  80 value 5928.142121
## iter  90 value 5886.958538
## iter 100 value 5525.389143
## final  value 5525.389143 
## stopped after 100 iterations
## # weights:  771
## initial  value 6618.520897 
## iter  10 value 6247.712375
## iter  20 value 6066.320161
## iter  30 value 6050.380936
## iter  40 value 6031.415884
## iter  50 value 5989.421751
## iter  60 value 5986.485814
## iter  70 value 5983.277920
## iter  80 value 5982.758703
## iter  90 value 5982.646340
## iter 100 value 5976.554420
## final  value 5976.554420 
## stopped after 100 iterations
## # weights:  221
## initial  value 6624.386623 
## iter  10 value 6352.539039
## iter  20 value 6351.105513
## iter  30 value 6091.494135
## iter  40 value 6087.237195
## iter  50 value 6080.431511
## iter  60 value 5984.655430
## iter  70 value 5979.888775
## iter  80 value 5936.402931
## iter  90 value 5887.083004
## iter 100 value 5803.625187
## final  value 5803.625187 
## stopped after 100 iterations
## # weights:  331
## initial  value 6895.917023 
## iter  10 value 6421.541243
## iter  20 value 6337.756038
## iter  30 value 6039.159866
## iter  40 value 5989.582412
## iter  50 value 5961.452230
## iter  60 value 5950.796425
## iter  70 value 5925.505243
## iter  80 value 5885.622689
## iter  90 value 5832.072678
## iter 100 value 5812.019580
## final  value 5812.019580 
## stopped after 100 iterations
## # weights:  551
## initial  value 6476.899030 
## iter  10 value 6252.818280
## iter  20 value 6091.410725
## iter  30 value 6050.060853
## iter  40 value 6032.413629
## iter  50 value 5988.622817
## iter  60 value 5952.673875
## iter  70 value 5913.003199
## iter  80 value 5901.609662
## iter  90 value 5889.393522
## iter 100 value 5880.620215
## final  value 5880.620215 
## stopped after 100 iterations
## # weights:  771
## initial  value 6656.295216 
## iter  10 value 6433.362740
## iter  20 value 6262.960624
## iter  30 value 6240.940926
## iter  40 value 6118.607473
## iter  50 value 6094.179353
## iter  60 value 5965.476099
## iter  70 value 5940.319212
## iter  80 value 5919.263886
## iter  90 value 5912.172506
## iter 100 value 5892.421706
## final  value 5892.421706 
## stopped after 100 iterations
## # weights:  221
## initial  value 6642.680049 
## iter  10 value 6437.105360
## iter  20 value 6435.482249
## iter  30 value 6120.768080
## iter  40 value 6064.079930
## iter  50 value 6033.160039
## iter  60 value 6009.067464
## iter  70 value 5942.317943
## iter  80 value 5936.754862
## iter  90 value 5926.279070
## iter 100 value 5908.055881
## final  value 5908.055881 
## stopped after 100 iterations
## # weights:  331
## initial  value 7288.128127 
## iter  10 value 6434.637682
## iter  20 value 6308.756666
## iter  30 value 6095.085952
## iter  40 value 6086.422230
## iter  50 value 6032.806164
## iter  60 value 5980.851270
## iter  70 value 5966.151669
## iter  80 value 5958.776633
## iter  90 value 5935.804513
## iter 100 value 5885.095545
## final  value 5885.095545 
## stopped after 100 iterations
## # weights:  551
## initial  value 7280.024190 
## iter  10 value 6335.326059
## iter  20 value 6056.785859
## iter  30 value 6035.410784
## iter  40 value 5963.693831
## iter  50 value 5946.522615
## iter  60 value 5860.359957
## iter  70 value 5745.775932
## iter  80 value 5101.190442
## iter  90 value 4978.885524
## iter 100 value 4969.186569
## final  value 4969.186569 
## stopped after 100 iterations
## # weights:  771
## initial  value 6513.864583 
## iter  10 value 6233.480247
## iter  20 value 6033.096419
## iter  30 value 5989.360453
## iter  40 value 5968.854361
## iter  50 value 5947.373119
## iter  60 value 5929.322952
## iter  70 value 5906.118051
## iter  80 value 5874.695975
## iter  90 value 5869.020232
## iter 100 value 5854.020123
## final  value 5854.020123 
## stopped after 100 iterations
## # weights:  221
## initial  value 6759.508796 
## iter  10 value 6393.448276
## iter  20 value 6086.022497
## iter  30 value 6082.809885
## iter  40 value 6073.483565
## iter  50 value 6005.288971
## iter  60 value 6000.193054
## iter  70 value 5997.340763
## iter  80 value 5993.426245
## iter  90 value 5986.452621
## iter 100 value 5972.010801
## final  value 5972.010801 
## stopped after 100 iterations
## # weights:  331
## initial  value 6538.759378 
## final  value 6437.106937 
## converged
## # weights:  551
## initial  value 8902.006468 
## iter  10 value 6372.210642
## iter  20 value 6077.065228
## iter  30 value 6011.501226
## iter  40 value 5951.786579
## iter  50 value 5913.538491
## iter  60 value 5897.675722
## iter  70 value 5675.585555
## iter  80 value 5171.148844
## iter  90 value 5116.136813
## iter 100 value 4879.242661
## final  value 4879.242661 
## stopped after 100 iterations
## # weights:  771
## initial  value 6610.753745 
## iter  10 value 6329.226245
## iter  20 value 6322.016688
## iter  30 value 6316.591281
## iter  40 value 6121.947100
## iter  50 value 6098.074794
## iter  60 value 6086.135409
## iter  70 value 6083.416589
## iter  80 value 6077.461012
## iter  90 value 6076.619014
## iter 100 value 6076.140908
## final  value 6076.140908 
## stopped after 100 iterations
## # weights:  221
## initial  value 6865.444058 
## final  value 6437.106959 
## converged
## # weights:  331
## initial  value 7678.996080 
## iter  10 value 6234.863234
## iter  20 value 6127.758861
## iter  30 value 6081.628029
## iter  40 value 6046.713176
## iter  50 value 6037.762082
## iter  60 value 6035.505474
## iter  70 value 6033.149920
## iter  80 value 6016.839941
## iter  90 value 5988.729390
## iter 100 value 5959.084505
## final  value 5959.084505 
## stopped after 100 iterations
## # weights:  551
## initial  value 6544.462069 
## iter  10 value 6348.906466
## iter  20 value 6110.308343
## iter  30 value 5883.450867
## iter  40 value 5507.389633
## iter  50 value 5169.685370
## iter  60 value 5076.601583
## iter  70 value 4972.357492
## iter  80 value 4822.940151
## iter  90 value 4749.223835
## iter 100 value 4735.243882
## final  value 4735.243882 
## stopped after 100 iterations
## # weights:  771
## initial  value 10208.858574 
## iter  10 value 6182.653395
## iter  20 value 6128.839466
## iter  30 value 6086.772274
## iter  40 value 6066.033674
## iter  50 value 6061.639103
## iter  60 value 6021.280931
## iter  70 value 5943.638463
## iter  80 value 5672.054013
## iter  90 value 5481.925027
## iter 100 value 5439.811670
## final  value 5439.811670 
## stopped after 100 iterations
## # weights:  221
## initial  value 7670.662511 
## iter  10 value 6439.296940
## iter  20 value 6437.141166
## final  value 6437.107081 
## converged
## # weights:  331
## initial  value 6552.235676 
## iter  10 value 6437.091320
## iter  10 value 6437.091315
## iter  20 value 6381.209589
## iter  30 value 6121.638836
## iter  40 value 5988.769277
## iter  50 value 5807.606745
## iter  60 value 5452.892825
## iter  70 value 5224.649769
## iter  80 value 4992.962262
## iter  90 value 4879.674316
## iter 100 value 4824.727422
## final  value 4824.727422 
## stopped after 100 iterations
## # weights:  551
## initial  value 6523.176048 
## iter  10 value 6433.201890
## iter  20 value 6013.465538
## iter  30 value 5961.475721
## iter  40 value 5951.215937
## iter  50 value 5912.440432
## iter  60 value 5903.120227
## iter  70 value 5872.344436
## iter  80 value 5864.003517
## iter  90 value 5859.021385
## iter 100 value 5843.318663
## final  value 5843.318663 
## stopped after 100 iterations
## # weights:  771
## initial  value 6720.488842 
## iter  10 value 6416.407577
## iter  20 value 6111.334930
## iter  30 value 6010.933784
## iter  40 value 5985.265001
## iter  50 value 5979.868989
## iter  60 value 5975.110660
## iter  70 value 5973.960533
## iter  80 value 5968.546437
## iter  90 value 5961.932420
## iter 100 value 5946.386875
## final  value 5946.386875 
## stopped after 100 iterations
## # weights:  551
## initial  value 9734.604067 
## iter  10 value 9649.529011
## iter  20 value 9143.027823
## iter  30 value 9076.401701
## iter  40 value 8986.672925
## iter  50 value 8918.585958
## iter  60 value 8896.008300
## iter  70 value 8850.395539
## iter  80 value 8752.145731
## iter  90 value 8644.482881
## iter 100 value 8517.743302
## final  value 8517.743302 
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n2_NN1Fit0
## Neural Network 
## 
## 13933 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9288, 9289, 9289 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.6709226  0.3451319
##   2     0.5    0.6118503  0.2177579
##   2     0.7    0.6080457  0.2132917
##   3     0.3    0.6141545  0.2255223
##   3     0.5    0.6580730  0.3209352
##   3     0.7    0.6690599  0.3418958
##   5     0.3    0.6779642  0.3596250
##   5     0.5    0.6514065  0.3091927
##   5     0.7    0.6537035  0.3140368
##   7     0.3    0.5874536  0.1853705
##   7     0.5    0.6397070  0.2861448
##   7     0.7    0.6510383  0.3081234
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_PC_5.60.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.6023681 0.2148234    Fold1
## 2 0.7353575 0.4699286    Fold3
## 3 0.6961671 0.3941229    Fold2
ad_tda_pc_5.60.5_n2_nn1_fit_re<-Adult_TDA_PC_5.60.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n2_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -0.76    -0.01     0.20    -0.01     0.39     0.00    -1.00     0.33 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -0.47    -0.20     0.00     0.00    -0.75     0.83    -0.08     0.01 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.41    -0.11    -0.86     0.29     0.37     1.14    -0.87    -1.95 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.66     0.00     0.22    -0.07    -0.01     0.49    -0.01    -1.91 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.09     0.36     0.22     0.00     0.20    -0.04    -0.01    -1.03 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     1.29    -0.37    -0.52    -0.10    -0.29     0.00     0.54     0.39 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -1.08     0.45    -0.18    -3.00     0.95    -0.04     0.09     0.18 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     1.05    -0.22    -0.09     1.00     0.25    -1.70     1.33    -2.09 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00    -0.05     0.16     0.01    -0.57     0.02    -0.01 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##    -0.13     0.00     0.01    -0.02    -0.04     0.04    -0.58    -0.15 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##    -0.01    -0.04     0.00     0.01     0.02     0.00    -0.08    -0.07 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00    -0.19     0.01    -0.03     0.00    -0.29     0.16     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.14     0.01     0.02     0.26     0.00    -0.10     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.01     0.00     0.70    -0.04     0.02 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##    -0.22     0.31    -0.06     0.13     0.12     0.00     0.04     0.05 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##    -0.50     0.00     0.00     0.00    -0.05    -0.14    -0.05     0.05 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.06     0.03    -0.02    -0.04    -0.01     0.57     0.04    -0.67 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.44     0.00    -0.04    -0.37     1.70     0.23     0.00    -0.69 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.03     0.15     0.01     0.05    -0.06     0.08     0.02    -0.36 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.34    -0.23    -0.13    -0.22    -0.17     0.00     0.60     0.02 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.23    -0.11    -0.22    -1.17     0.31     0.03     0.02     0.09 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.51    -0.07    -0.12     0.07     0.00    -0.10     0.61    -0.83 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.12    -0.32    -0.52    -0.12    -0.01     0.04    -0.06    -0.01 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##    -0.04     0.00     0.00     0.07     0.01     0.00     0.01    -0.01 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.06     0.00     0.00    -0.09     0.00     0.03     0.02 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.01     0.01     0.00     0.02     0.00     0.03     0.02     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.01    -0.01     0.01     0.00     0.02     0.00     0.01     0.01 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00    -0.25    -0.01     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o 
##  0.36  3.95  0.36  0.38 -1.44  0.42
vip(Adult_TDA_PC_5.60.5_n2_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3895  1431
##      >50K    3521   921
##                                          
##                Accuracy : 0.493          
##                  95% CI : (0.4831, 0.503)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.0638        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.5252         
##             Specificity : 0.3916         
##          Pos Pred Value : 0.7313         
##          Neg Pred Value : 0.2073         
##              Prevalence : 0.7592         
##          Detection Rate : 0.3988         
##    Detection Prevalence : 0.5452         
##       Balanced Accuracy : 0.4584         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3895  1431
##      >50K    3521   921
##                                          
##                Accuracy : 0.493          
##                  95% CI : (0.4831, 0.503)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.0638        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.5252         
##             Specificity : 0.3916         
##          Pos Pred Value : 0.7313         
##          Neg Pred Value : 0.2073         
##              Prevalence : 0.7592         
##          Detection Rate : 0.3988         
##    Detection Prevalence : 0.5452         
##       Balanced Accuracy : 0.4584         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.930385e-01  -6.383549e-02   4.830761e-01   5.030051e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  1.172644e-193
ad_tda_pc_5.60.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.5252157            0.3915816            0.7313181 
##       Neg Pred Value            Precision               Recall 
##            0.2073390            0.7313181            0.5252157 
##                   F1           Prevalence       Detection Rate 
##            0.6113640            0.7592138            0.3987510 
## Detection Prevalence    Balanced Accuracy 
##            0.5452498            0.4583987
ad_tda_pc_5.60.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n2_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n2_3_fold
##    Accuracy
## 1 0.2055824
## 2 0.1147347
## 3 0.1052280
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009666667
## 
## $winRight
## [1] 0.9903333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n2_3_fold
## $left
## [1] 0.02718863
## 
## $rope
## [1] 0.007957845
## 
## $right
## [1] 0.9648535
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold))
#bf_tda_pca_5.60.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold)
## t = 4.4348, df = 2, p-value = 0.04727
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.004228177 0.279468526
## sample estimates:
## mean of x 
## 0.1418484
### Test set diff
diff_tda_pca_5.60.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n2_test
##  Accuracy 
## 0.3120393
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n2_test_odds.left<-bst_tda_pca_5.60.5_nn1.n2_test$probLeft/bst_tda_pca_5.60.5_nn1.n2_test$probRight
bst_tda_pca_5.60.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1595
## 
## $winRight
## [1] 0.8405
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n2_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n2_test)) #bf_tda_pca_5.60.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node3

#Neural Network 1
Adult_TDA_PC_5.60.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 8020.772569 
## iter  10 value 5969.530145
## iter  20 value 5968.527188
## iter  30 value 5949.360754
## iter  40 value 5908.199289
## iter  50 value 5883.980449
## iter  60 value 5595.301300
## iter  70 value 5364.202127
## iter  80 value 5143.340234
## iter  90 value 4881.575685
## iter 100 value 4787.072563
## final  value 4787.072563 
## stopped after 100 iterations
## # weights:  331
## initial  value 7876.574395 
## iter  10 value 5640.144786
## iter  20 value 5609.583173
## iter  30 value 5528.735964
## iter  40 value 5460.248702
## iter  50 value 5437.382420
## iter  60 value 5429.217403
## iter  70 value 5407.786755
## iter  80 value 5400.048523
## iter  90 value 5381.470321
## iter 100 value 5347.146450
## final  value 5347.146450 
## stopped after 100 iterations
## # weights:  551
## initial  value 9682.572812 
## iter  10 value 5968.539665
## iter  20 value 5968.445277
## iter  30 value 5967.123101
## iter  40 value 5824.993754
## iter  50 value 5653.073291
## iter  60 value 5618.543443
## iter  70 value 5580.653303
## iter  80 value 5575.313823
## iter  90 value 5473.017319
## iter 100 value 5442.425101
## final  value 5442.425101 
## stopped after 100 iterations
## # weights:  771
## initial  value 6814.682943 
## iter  10 value 5967.890203
## iter  20 value 5870.847057
## iter  30 value 5715.317568
## iter  40 value 5648.507585
## iter  50 value 5626.354075
## iter  60 value 5593.885581
## iter  70 value 5567.226873
## iter  80 value 5554.566383
## iter  90 value 5504.902192
## iter 100 value 5487.933203
## final  value 5487.933203 
## stopped after 100 iterations
## # weights:  221
## initial  value 6595.657007 
## iter  10 value 5967.116369
## iter  20 value 5924.877302
## iter  30 value 5887.341950
## iter  40 value 5468.628120
## iter  50 value 5433.142966
## iter  60 value 5353.283782
## iter  70 value 5306.936054
## iter  80 value 5251.047783
## iter  90 value 5115.378075
## iter 100 value 4966.987226
## final  value 4966.987226 
## stopped after 100 iterations
## # weights:  331
## initial  value 9822.068219 
## iter  10 value 5691.734051
## iter  20 value 5639.033781
## iter  30 value 5626.230582
## iter  40 value 5626.097934
## iter  50 value 5618.610550
## iter  60 value 5614.315605
## iter  70 value 5493.088629
## iter  80 value 5435.521515
## iter  90 value 5420.314009
## iter 100 value 5414.200725
## final  value 5414.200725 
## stopped after 100 iterations
## # weights:  551
## initial  value 6369.122030 
## iter  10 value 5674.731060
## iter  20 value 5597.650283
## iter  30 value 5503.012658
## iter  40 value 5471.541590
## iter  50 value 5446.189193
## iter  60 value 5422.698886
## iter  70 value 5414.955533
## iter  80 value 5409.760974
## iter  90 value 5406.034079
## iter 100 value 5386.793622
## final  value 5386.793622 
## stopped after 100 iterations
## # weights:  771
## initial  value 6625.754248 
## iter  10 value 5959.277333
## iter  20 value 5947.357128
## iter  30 value 5638.193742
## iter  40 value 5619.010973
## iter  50 value 5588.253177
## iter  60 value 5566.801060
## iter  70 value 5505.422533
## iter  80 value 5470.497425
## iter  90 value 5427.103308
## iter 100 value 5397.914398
## final  value 5397.914398 
## stopped after 100 iterations
## # weights:  221
## initial  value 6221.898355 
## iter  10 value 5948.613396
## iter  20 value 5632.550362
## iter  30 value 5632.469518
## iter  40 value 5632.320691
## iter  50 value 5632.213129
## iter  60 value 5630.055002
## iter  70 value 5624.918082
## iter  80 value 5624.863679
## final  value 5624.863478 
## converged
## # weights:  331
## initial  value 6990.937798 
## iter  10 value 5804.076383
## iter  20 value 5632.940775
## iter  30 value 5627.105931
## iter  40 value 5624.760299
## final  value 5624.693708 
## converged
## # weights:  551
## initial  value 6474.331245 
## iter  10 value 5956.137700
## iter  20 value 5803.072780
## iter  30 value 5638.391208
## iter  40 value 5621.514774
## iter  50 value 5608.477175
## iter  60 value 5538.997001
## iter  70 value 5522.673354
## iter  80 value 5521.079645
## iter  90 value 5483.089790
## iter 100 value 5465.016574
## final  value 5465.016574 
## stopped after 100 iterations
## # weights:  771
## initial  value 6138.362418 
## iter  10 value 5911.694695
## iter  20 value 5628.580252
## iter  30 value 5619.693358
## iter  40 value 5505.470256
## iter  50 value 5451.128658
## iter  60 value 5448.696480
## iter  70 value 5446.780985
## iter  80 value 5440.907180
## iter  90 value 5419.116285
## iter 100 value 5406.536031
## final  value 5406.536031 
## stopped after 100 iterations
## # weights:  221
## initial  value 7610.209014 
## iter  10 value 5968.220595
## final  value 5968.220076 
## converged
## # weights:  331
## initial  value 6548.381792 
## iter  10 value 5845.150439
## iter  20 value 5638.747057
## iter  30 value 5636.173762
## iter  40 value 5629.936031
## iter  50 value 5463.446678
## iter  60 value 5453.469623
## iter  70 value 5441.200910
## iter  80 value 5420.306589
## iter  90 value 5411.196803
## iter 100 value 5392.098701
## final  value 5392.098701 
## stopped after 100 iterations
## # weights:  551
## initial  value 6027.309520 
## iter  10 value 5756.350278
## iter  20 value 5726.352466
## iter  30 value 5637.089391
## iter  40 value 5624.366500
## iter  50 value 5618.492974
## iter  60 value 5615.065191
## iter  70 value 5590.170613
## iter  80 value 5495.265814
## iter  90 value 5454.032391
## iter 100 value 5440.840438
## final  value 5440.840438 
## stopped after 100 iterations
## # weights:  771
## initial  value 5873.644743 
## iter  10 value 5729.978150
## iter  20 value 5667.010678
## iter  30 value 5632.796459
## iter  40 value 5625.385661
## iter  50 value 5618.687273
## iter  60 value 5609.563928
## iter  70 value 5605.323436
## iter  80 value 5603.329805
## iter  90 value 5600.719730
## iter 100 value 5595.158221
## final  value 5595.158221 
## stopped after 100 iterations
## # weights:  221
## initial  value 8691.966473 
## iter  10 value 5974.213633
## iter  20 value 5968.590062
## iter  30 value 5955.819590
## iter  40 value 5927.948423
## iter  50 value 5871.144425
## iter  60 value 5641.776286
## iter  70 value 5634.417847
## iter  80 value 5627.397930
## iter  90 value 5606.597580
## iter 100 value 5567.276285
## final  value 5567.276285 
## stopped after 100 iterations
## # weights:  331
## initial  value 7843.424471 
## iter  10 value 5783.996502
## iter  20 value 5757.785028
## iter  30 value 5653.971440
## iter  40 value 5622.346296
## iter  50 value 5580.269421
## iter  60 value 5530.667997
## iter  70 value 5474.085177
## iter  80 value 5466.030870
## iter  90 value 5451.715745
## iter 100 value 5428.636545
## final  value 5428.636545 
## stopped after 100 iterations
## # weights:  551
## initial  value 6564.611784 
## iter  10 value 5675.398252
## iter  20 value 5666.110152
## iter  30 value 5633.004713
## iter  40 value 5631.046225
## iter  50 value 5630.970996
## final  value 5630.880497 
## converged
## # weights:  771
## initial  value 6022.606048 
## iter  10 value 5916.629676
## iter  20 value 5633.097933
## iter  30 value 5565.838806
## iter  40 value 5470.896167
## iter  50 value 5402.817826
## iter  60 value 5346.790532
## iter  70 value 5286.162544
## iter  80 value 5228.661826
## iter  90 value 5051.226675
## iter 100 value 5020.416558
## final  value 5020.416558 
## stopped after 100 iterations
## # weights:  221
## initial  value 6654.266264 
## iter  10 value 5678.136076
## iter  20 value 5599.154788
## iter  30 value 5496.907527
## iter  40 value 5366.121848
## iter  50 value 5168.405804
## iter  60 value 5044.237213
## iter  70 value 4963.531901
## iter  80 value 4917.767250
## iter  90 value 4907.046156
## iter 100 value 4904.876208
## final  value 4904.876208 
## stopped after 100 iterations
## # weights:  331
## initial  value 7008.732903 
## iter  10 value 5944.072750
## iter  20 value 5898.770705
## iter  30 value 5619.574242
## iter  40 value 5517.730747
## iter  50 value 5492.725748
## iter  60 value 5451.921020
## iter  70 value 5399.450357
## iter  80 value 5297.230861
## iter  90 value 5286.166898
## iter 100 value 5018.336700
## final  value 5018.336700 
## stopped after 100 iterations
## # weights:  551
## initial  value 6201.212600 
## iter  10 value 5876.233384
## iter  20 value 5851.300905
## iter  30 value 5829.512545
## iter  40 value 5812.909622
## iter  50 value 5797.217905
## iter  60 value 5608.915380
## iter  70 value 5592.520752
## iter  80 value 5528.180643
## iter  90 value 5506.520955
## iter 100 value 5467.342440
## final  value 5467.342440 
## stopped after 100 iterations
## # weights:  771
## initial  value 8065.857984 
## iter  10 value 5939.058154
## iter  20 value 5720.843351
## iter  30 value 5660.216350
## iter  40 value 5612.246543
## iter  50 value 5477.873322
## iter  60 value 5276.274438
## iter  70 value 5083.850700
## iter  80 value 4972.950815
## iter  90 value 4958.500144
## iter 100 value 4936.394109
## final  value 4936.394109 
## stopped after 100 iterations
## # weights:  221
## initial  value 6950.067207 
## iter  10 value 5962.894980
## iter  20 value 5755.706187
## iter  30 value 5633.369813
## iter  40 value 5583.349387
## iter  50 value 5495.373832
## iter  60 value 5190.031850
## iter  70 value 4988.992786
## iter  80 value 4910.609026
## iter  90 value 4890.584909
## iter 100 value 4862.887154
## final  value 4862.887154 
## stopped after 100 iterations
## # weights:  331
## initial  value 6690.899890 
## iter  10 value 5969.821668
## iter  20 value 5742.348079
## iter  30 value 5722.514608
## iter  40 value 5716.129776
## iter  50 value 5595.145932
## iter  60 value 5559.459363
## iter  70 value 5536.427363
## iter  80 value 5325.604897
## iter  90 value 5263.336592
## iter 100 value 5207.810575
## final  value 5207.810575 
## stopped after 100 iterations
## # weights:  551
## initial  value 6096.903000 
## iter  10 value 5920.598322
## iter  20 value 5846.243371
## iter  30 value 5840.898056
## iter  40 value 5661.747264
## iter  50 value 5555.054417
## iter  60 value 5464.375650
## iter  70 value 5454.411016
## iter  80 value 5377.527365
## iter  90 value 5255.203771
## iter 100 value 5123.974506
## final  value 5123.974506 
## stopped after 100 iterations
## # weights:  771
## initial  value 8409.407184 
## iter  10 value 5718.947784
## iter  20 value 5634.824295
## iter  30 value 5632.173545
## iter  40 value 5607.226599
## iter  50 value 5586.977947
## iter  60 value 5501.854444
## iter  70 value 5480.435299
## iter  80 value 5465.007973
## iter  90 value 5447.756587
## iter 100 value 5415.456344
## final  value 5415.456344 
## stopped after 100 iterations
## # weights:  221
## initial  value 6165.208528 
## iter  10 value 5972.209202
## iter  20 value 5969.924671
## iter  30 value 5969.897823
## final  value 5969.897562 
## converged
## # weights:  331
## initial  value 6549.820422 
## iter  10 value 5890.338872
## iter  20 value 5645.157301
## iter  30 value 5634.287349
## iter  40 value 5607.457017
## iter  50 value 5567.383552
## iter  60 value 5518.423522
## iter  70 value 5391.740372
## iter  80 value 5199.419277
## iter  90 value 4976.569854
## iter 100 value 4870.380987
## final  value 4870.380987 
## stopped after 100 iterations
## # weights:  551
## initial  value 7330.249099 
## iter  10 value 5892.129820
## iter  20 value 5533.595020
## iter  30 value 5501.608586
## iter  40 value 5464.137084
## iter  50 value 5445.127185
## iter  60 value 5418.026048
## iter  70 value 5399.933197
## iter  80 value 5307.195977
## iter  90 value 5263.271005
## iter 100 value 5086.796314
## final  value 5086.796314 
## stopped after 100 iterations
## # weights:  771
## initial  value 12686.400280 
## iter  10 value 6034.620541
## iter  20 value 5916.354522
## iter  30 value 5661.976587
## iter  30 value 5661.976539
## final  value 5661.976539 
## converged
## # weights:  221
## initial  value 7450.176285 
## iter  10 value 5801.253272
## iter  20 value 5648.159964
## iter  30 value 5499.611931
## iter  40 value 5164.368211
## iter  50 value 4973.680224
## iter  60 value 4929.933992
## iter  70 value 4835.669513
## iter  80 value 4801.836057
## iter  90 value 4797.302964
## iter 100 value 4788.286016
## final  value 4788.286016 
## stopped after 100 iterations
## # weights:  331
## initial  value 6080.134781 
## iter  10 value 5959.236308
## iter  20 value 5788.947435
## iter  30 value 5628.053331
## iter  40 value 5624.691122
## iter  50 value 5578.642864
## iter  60 value 5495.122259
## iter  70 value 5462.645550
## iter  80 value 5457.193310
## iter  90 value 5443.629546
## iter 100 value 5420.272463
## final  value 5420.272463 
## stopped after 100 iterations
## # weights:  551
## initial  value 7671.379250 
## iter  10 value 5948.994083
## iter  20 value 5591.363783
## iter  30 value 5576.531125
## iter  40 value 5569.431481
## iter  50 value 5487.465457
## iter  60 value 5454.462363
## iter  70 value 5417.862955
## iter  80 value 5412.925919
## iter  90 value 5383.293368
## iter 100 value 5335.996121
## final  value 5335.996121 
## stopped after 100 iterations
## # weights:  771
## initial  value 7316.881523 
## iter  10 value 5942.311980
## iter  20 value 5641.847721
## iter  30 value 5605.997944
## iter  40 value 5595.471814
## iter  50 value 5501.505453
## iter  60 value 5460.656838
## iter  70 value 5433.652739
## iter  80 value 5424.730884
## iter  90 value 5422.933560
## iter 100 value 5390.531166
## final  value 5390.531166 
## stopped after 100 iterations
## # weights:  331
## initial  value 11291.681310 
## iter  10 value 8856.091618
## iter  20 value 8837.702052
## iter  30 value 8825.006643
## iter  40 value 8817.134785
## iter  50 value 8813.512309
## iter  60 value 8505.906210
## iter  70 value 8474.726896
## iter  80 value 8320.565169
## iter  90 value 8197.618345
## iter 100 value 7989.611255
## final  value 7989.611255 
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n3_NN1Fit0
## Neural Network 
## 
## 15744 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10496, 10495, 10497 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7679138  0.1700041
##   2     0.5    0.7675284  0.1765438
##   2     0.7    0.7799795  0.2429999
##   3     0.3    0.7802334  0.2374957
##   3     0.5    0.7816310  0.2406777
##   3     0.7    0.7784553  0.2129553
##   5     0.3    0.7811863  0.2456170
##   5     0.5    0.7763594  0.2310306
##   5     0.7    0.7764234  0.2005918
##   7     0.3    0.7764233  0.1971602
##   7     0.5    0.7740087  0.2276808
##   7     0.7    0.7813768  0.2535427
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.5.
Adult_TDA_PC_5.60.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.7821612 0.2978058    Fold3
## 2 0.7835778 0.2233166    Fold2
## 3 0.7791540 0.2009106    Fold1
ad_tda_pc_5.60.5_n3_nn1_fit_re<-Adult_TDA_PC_5.60.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.17     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.01     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.02     2.20     0.00     0.11     0.08     0.00    -0.05     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.12     0.00     0.00    -0.50     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.07     0.00     0.00    -0.17     0.02     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.10    -0.34    -0.01     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.02     0.00     0.00     0.00     0.18     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00    -0.12     0.00    -0.02     0.04     0.00    -0.01    -0.05 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.01     0.01     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00    -0.05     0.00     0.06     0.00     0.01 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     6.03     0.00    -0.91     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.04    -0.05     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.03     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##    -2.39     0.01     0.42     0.28    -0.53     0.00     0.09    -0.75 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##    -1.72     0.05    -0.22     0.00    -0.51     0.23    -0.65    -0.83 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##    -0.23    -0.55    -0.68     0.50    -0.56     0.12     1.05    -0.10 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##    -0.20    -0.15    -0.28     0.46    -0.01    -0.25    -0.85     0.42 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##    -0.29    -0.04    -1.64     0.26     0.42     1.03     0.10     0.77 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     1.02     0.22     0.19     0.71    -0.36    -0.03     0.59    -0.86 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     1.08    -6.48    -0.78    -0.34    -0.64    -0.47    -0.02    -0.21 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##    -0.70    -0.87    -1.22     0.17    -0.54     0.07    -0.27    -2.11 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.01     0.27    -0.10    -0.10    -0.99    -0.36 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.41    -0.12     0.21     0.26     0.47     1.02    -1.06    -0.88 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##    -1.38    -0.96     0.00     0.07     0.00    -0.26    -0.67     0.63 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.71     0.22     0.06     0.40     0.07     0.05    -0.03    -0.14 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##    -0.03     1.53     0.55    -0.14     0.03     0.45    -1.29    -0.20 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##    -0.07     0.21    -0.02    -1.10    -0.13 
##  b->o h1->o h2->o h3->o 
## -0.50 -0.89  0.17  5.10
vip(Adult_TDA_PC_5.60.5_n3_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6729  1815
##      >50K     687   537
##                                           
##                Accuracy : 0.7439          
##                  95% CI : (0.7351, 0.7525)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9998          
##                                           
##                   Kappa : 0.1622          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9074          
##             Specificity : 0.2283          
##          Pos Pred Value : 0.7876          
##          Neg Pred Value : 0.4387          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6889          
##    Detection Prevalence : 0.8747          
##       Balanced Accuracy : 0.5678          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6729  1815
##      >50K     687   537
##                                           
##                Accuracy : 0.7439          
##                  95% CI : (0.7351, 0.7525)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9998          
##                                           
##                   Kappa : 0.1622          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9074          
##             Specificity : 0.2283          
##          Pos Pred Value : 0.7876          
##          Neg Pred Value : 0.4387          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6889          
##    Detection Prevalence : 0.8747          
##       Balanced Accuracy : 0.5678          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.438575e-01   1.622455e-01   7.350781e-01   7.524912e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.997987e-01  2.062967e-112
ad_tda_pc_5.60.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9073625            0.2283163            0.7875702 
##       Neg Pred Value            Precision               Recall 
##            0.4387255            0.7875702            0.9073625 
##                   F1           Prevalence       Detection Rate 
##            0.8432331            0.7592138            0.6888821 
## Detection Prevalence    Balanced Accuracy 
##            0.8746929            0.5678394
ad_tda_pc_5.60.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n3_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n3_3_fold
##     Accuracy
## 1 0.02578927
## 2 0.06651431
## 3 0.02224114
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008233333
## 
## $winRight
## [1] 0.9917667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n3_3_fold
## $left
## [1] 0.04948247
## 
## $rope
## [1] 0.06445571
## 
## $right
## [1] 0.8860618
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold))
#bf_tda_pca_5.60.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold)
## t = 2.6882, df = 2, p-value = 0.115
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02293049  0.09929364
## sample estimates:
##  mean of x 
## 0.03818157
### Test set diff
diff_tda_pca_5.60.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n3_test
##   Accuracy 
## 0.06122031
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n3_test_odds.left<-bst_tda_pca_5.60.5_nn1.n3_test$probLeft/bst_tda_pca_5.60.5_nn1.n3_test$probRight
bst_tda_pca_5.60.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1579333
## 
## $winRight
## [1] 0.8420667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n3_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n3_test)) #bf_tda_pca_5.60.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test))


##Node4

#Neural Network 1
Adult_TDA_PC_5.60.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 8318.339118 
## iter  10 value 3175.581436
## iter  20 value 3175.360580
## iter  30 value 3174.429923
## iter  40 value 3171.479766
## iter  50 value 3056.906775
## iter  60 value 2934.500898
## iter  70 value 2931.848123
## iter  80 value 2857.138368
## iter  90 value 2830.855223
## iter 100 value 2830.370222
## final  value 2830.370222 
## stopped after 100 iterations
## # weights:  331
## initial  value 15390.601350 
## iter  10 value 3174.760310
## iter  10 value 3174.760310
## iter  20 value 3170.884067
## iter  30 value 3052.498543
## iter  40 value 2841.762584
## iter  50 value 2794.548792
## iter  60 value 2715.456353
## iter  70 value 2296.174422
## iter  80 value 2198.156499
## iter  90 value 2144.129432
## iter 100 value 2121.102740
## final  value 2121.102740 
## stopped after 100 iterations
## # weights:  551
## initial  value 9658.704641 
## iter  10 value 3176.535145
## iter  20 value 2918.635260
## iter  30 value 2839.893924
## iter  40 value 2803.776171
## iter  50 value 2750.467655
## iter  60 value 2641.970727
## iter  70 value 2521.974159
## iter  80 value 2417.398638
## iter  90 value 2399.528725
## iter 100 value 2363.327757
## final  value 2363.327757 
## stopped after 100 iterations
## # weights:  771
## initial  value 12242.422298 
## iter  10 value 3043.960771
## iter  20 value 2852.756756
## iter  30 value 2833.307137
## iter  40 value 2788.859258
## iter  50 value 2702.237556
## iter  60 value 2658.085353
## iter  70 value 2483.361322
## iter  80 value 2251.760553
## iter  90 value 2177.425476
## iter 100 value 2081.021450
## final  value 2081.021450 
## stopped after 100 iterations
## # weights:  221
## initial  value 15105.373403 
## iter  10 value 2895.099002
## iter  20 value 2689.810873
## iter  30 value 2494.716724
## iter  40 value 2304.040442
## iter  50 value 2255.921195
## iter  60 value 2175.513796
## iter  70 value 2133.816325
## iter  80 value 2084.598545
## iter  90 value 2078.448811
## iter 100 value 2077.113637
## final  value 2077.113637 
## stopped after 100 iterations
## # weights:  331
## initial  value 9296.093651 
## iter  10 value 3167.254065
## iter  20 value 2540.379769
## iter  30 value 2369.209402
## iter  40 value 2276.078337
## iter  50 value 2230.444370
## iter  60 value 2219.921975
## iter  70 value 2219.799235
## iter  80 value 2217.108119
## iter  90 value 2190.455378
## iter 100 value 2173.735733
## final  value 2173.735733 
## stopped after 100 iterations
## # weights:  551
## initial  value 16151.221545 
## iter  10 value 3178.016338
## iter  20 value 3169.402100
## iter  30 value 2935.554822
## iter  40 value 2912.830716
## iter  50 value 2857.119881
## iter  60 value 2810.852484
## iter  70 value 2800.294422
## iter  80 value 2797.664262
## iter  90 value 2785.745682
## iter 100 value 2781.891518
## final  value 2781.891518 
## stopped after 100 iterations
## # weights:  771
## initial  value 17160.026245 
## iter  10 value 3161.407550
## iter  20 value 2841.917904
## iter  30 value 2826.289848
## iter  40 value 2825.701657
## iter  50 value 2818.239924
## iter  60 value 2800.902493
## iter  70 value 2763.108985
## iter  80 value 2742.545434
## iter  90 value 2734.106569
## iter 100 value 2733.009686
## final  value 2733.009686 
## stopped after 100 iterations
## # weights:  221
## initial  value 6017.847720 
## iter  10 value 3178.123962
## iter  20 value 2958.530357
## iter  30 value 2913.292009
## iter  40 value 2881.405020
## iter  50 value 2775.653759
## iter  60 value 2719.126217
## iter  70 value 2533.008117
## iter  80 value 2345.463392
## iter  90 value 2277.314579
## iter 100 value 2176.085326
## final  value 2176.085326 
## stopped after 100 iterations
## # weights:  331
## initial  value 12784.462090 
## iter  10 value 3179.418885
## iter  20 value 3176.307362
## iter  30 value 3155.161516
## iter  40 value 3049.985344
## iter  50 value 2872.314473
## iter  60 value 2858.110191
## iter  70 value 2851.779222
## iter  80 value 2833.410158
## iter  90 value 2798.397836
## iter 100 value 2768.991243
## final  value 2768.991243 
## stopped after 100 iterations
## # weights:  551
## initial  value 10000.285372 
## iter  10 value 3189.986250
## iter  20 value 3179.762001
## iter  30 value 3178.584500
## iter  40 value 3171.521727
## iter  50 value 2973.688849
## iter  60 value 2883.847192
## iter  70 value 2731.243394
## iter  80 value 2708.232404
## iter  90 value 2676.948190
## iter 100 value 2601.783663
## final  value 2601.783663 
## stopped after 100 iterations
## # weights:  771
## initial  value 7840.135723 
## iter  10 value 3178.812950
## iter  20 value 3176.865202
## iter  30 value 3140.193649
## iter  40 value 2866.297815
## iter  50 value 2509.877301
## iter  60 value 2483.573391
## iter  70 value 2449.888344
## iter  80 value 2334.564148
## iter  90 value 2185.115722
## iter 100 value 2125.047485
## final  value 2125.047485 
## stopped after 100 iterations
## # weights:  221
## initial  value 8105.949359 
## iter  10 value 3235.228114
## iter  20 value 3179.968775
## iter  30 value 3069.350213
## iter  40 value 2870.088133
## iter  50 value 2797.936901
## iter  60 value 2784.644070
## iter  70 value 2783.233884
## iter  80 value 2777.014607
## iter  90 value 2738.231626
## iter 100 value 2660.087816
## final  value 2660.087816 
## stopped after 100 iterations
## # weights:  331
## initial  value 7502.482838 
## iter  10 value 3089.435871
## iter  20 value 3002.803427
## iter  30 value 2887.575634
## iter  40 value 2883.128806
## iter  50 value 2882.363134
## final  value 2880.314962 
## converged
## # weights:  551
## initial  value 11823.398416 
## iter  10 value 3212.127255
## iter  20 value 3198.743240
## iter  30 value 3019.173431
## iter  40 value 2936.624044
## iter  50 value 2832.306624
## iter  60 value 2820.085942
## iter  70 value 2811.087432
## iter  80 value 2798.797901
## iter  90 value 2769.774000
## iter 100 value 2607.251160
## final  value 2607.251160 
## stopped after 100 iterations
## # weights:  771
## initial  value 5070.332763 
## iter  10 value 2975.954881
## iter  20 value 2908.688311
## iter  30 value 2835.763453
## iter  40 value 2796.195464
## iter  50 value 2780.114996
## iter  60 value 2778.225304
## iter  70 value 2769.331775
## iter  80 value 2751.269880
## iter  90 value 2647.939916
## iter 100 value 2323.116948
## final  value 2323.116948 
## stopped after 100 iterations
## # weights:  221
## initial  value 8356.841771 
## iter  10 value 3180.975967
## iter  20 value 3180.421445
## iter  30 value 3177.244877
## iter  40 value 3026.800370
## iter  50 value 2906.900367
## iter  60 value 2895.611903
## iter  70 value 2882.824709
## iter  80 value 2882.041906
## iter  90 value 2879.461717
## iter 100 value 2870.999373
## final  value 2870.999373 
## stopped after 100 iterations
## # weights:  331
## initial  value 12457.404844 
## final  value 3177.249126 
## converged
## # weights:  551
## initial  value 14689.373225 
## iter  10 value 3185.663849
## iter  20 value 3156.742596
## iter  30 value 3155.369796
## iter  40 value 3152.555535
## iter  50 value 3141.118838
## iter  60 value 2908.799060
## iter  70 value 2757.910407
## iter  80 value 2553.488276
## iter  90 value 2370.294970
## iter 100 value 2331.520141
## final  value 2331.520141 
## stopped after 100 iterations
## # weights:  771
## initial  value 8829.016112 
## iter  10 value 3184.534095
## iter  20 value 3176.916576
## iter  30 value 3001.258611
## iter  40 value 2932.607976
## iter  50 value 2924.030669
## iter  60 value 2906.695369
## iter  70 value 2903.722815
## iter  80 value 2899.072467
## iter  90 value 2894.338672
## iter 100 value 2861.181597
## final  value 2861.181597 
## stopped after 100 iterations
## # weights:  221
## initial  value 7228.136496 
## iter  10 value 3083.664829
## iter  20 value 2905.775811
## iter  30 value 2903.209710
## iter  40 value 2901.545066
## iter  50 value 2863.284556
## iter  60 value 2812.497219
## iter  70 value 2796.960244
## iter  80 value 2766.297980
## iter  90 value 2718.071421
## iter 100 value 2529.017575
## final  value 2529.017575 
## stopped after 100 iterations
## # weights:  331
## initial  value 10313.968089 
## iter  10 value 3178.245814
## iter  20 value 3177.728970
## iter  30 value 3031.266190
## iter  40 value 2937.636917
## iter  50 value 2916.811479
## iter  60 value 2911.477572
## iter  70 value 2884.894518
## iter  80 value 2878.030742
## iter  90 value 2867.386363
## iter 100 value 2850.511592
## final  value 2850.511592 
## stopped after 100 iterations
## # weights:  551
## initial  value 7843.657469 
## iter  10 value 3242.403780
## iter  20 value 2950.731700
## iter  30 value 2878.574797
## iter  40 value 2840.009417
## iter  50 value 2824.447684
## iter  60 value 2802.561308
## iter  70 value 2767.344175
## iter  80 value 2744.200776
## iter  90 value 2688.569678
## iter 100 value 2637.296858
## final  value 2637.296858 
## stopped after 100 iterations
## # weights:  771
## initial  value 9633.952674 
## iter  10 value 3122.570308
## iter  20 value 2901.060055
## iter  30 value 2821.926566
## iter  40 value 2793.341837
## iter  50 value 2778.682667
## iter  60 value 2771.594887
## iter  70 value 2769.762496
## iter  70 value 2769.762473
## iter  80 value 2763.547982
## iter  90 value 2747.044167
## iter 100 value 2744.803651
## final  value 2744.803651 
## stopped after 100 iterations
## # weights:  221
## initial  value 5051.137548 
## iter  10 value 3174.497014
## iter  20 value 3174.397090
## iter  30 value 3146.885113
## iter  40 value 2929.660496
## iter  50 value 2919.480578
## iter  60 value 2868.733792
## iter  70 value 2855.700818
## iter  80 value 2848.870322
## iter  90 value 2830.271425
## iter 100 value 2816.466603
## final  value 2816.466603 
## stopped after 100 iterations
## # weights:  331
## initial  value 5683.463752 
## iter  10 value 3171.628339
## iter  20 value 2933.787291
## iter  30 value 2919.087869
## iter  40 value 2879.387645
## iter  50 value 2864.848373
## iter  60 value 2817.770655
## iter  70 value 2668.270024
## iter  80 value 2493.519074
## iter  90 value 2352.533083
## iter 100 value 2332.743505
## final  value 2332.743505 
## stopped after 100 iterations
## # weights:  551
## initial  value 10375.938585 
## iter  10 value 3176.765068
## iter  20 value 3013.188793
## iter  30 value 2930.148624
## iter  40 value 2897.274538
## iter  50 value 2887.512488
## iter  60 value 2861.701235
## iter  70 value 2842.146603
## iter  80 value 2755.422422
## iter  90 value 2731.539588
## iter 100 value 2729.308090
## final  value 2729.308090 
## stopped after 100 iterations
## # weights:  771
## initial  value 13152.127600 
## iter  10 value 3175.181735
## iter  20 value 2956.604572
## iter  30 value 2888.759612
## iter  40 value 2858.578193
## iter  50 value 2803.187673
## iter  60 value 2773.853296
## iter  70 value 2764.001758
## iter  80 value 2716.635155
## iter  90 value 2701.596286
## iter 100 value 2671.911237
## final  value 2671.911237 
## stopped after 100 iterations
## # weights:  221
## initial  value 8288.788797 
## iter  10 value 2975.175537
## iter  20 value 2775.725368
## iter  30 value 2763.784332
## iter  40 value 2761.376335
## iter  50 value 2760.602432
## iter  60 value 2683.496462
## iter  70 value 2507.431424
## iter  80 value 2464.144798
## iter  90 value 2288.473856
## iter 100 value 2283.575832
## final  value 2283.575832 
## stopped after 100 iterations
## # weights:  331
## initial  value 9671.848902 
## iter  10 value 2859.688135
## iter  20 value 2504.905439
## iter  30 value 2262.597822
## iter  40 value 2165.589678
## iter  50 value 2081.862368
## iter  60 value 2042.704581
## iter  70 value 1991.618373
## iter  80 value 1967.476807
## iter  90 value 1950.975698
## iter 100 value 1938.953919
## final  value 1938.953919 
## stopped after 100 iterations
## # weights:  551
## initial  value 12638.603501 
## iter  10 value 3184.338278
## iter  20 value 3122.185013
## iter  30 value 2921.536165
## iter  40 value 2912.406274
## iter  50 value 2908.742837
## iter  60 value 2906.500470
## iter  70 value 2880.582691
## iter  80 value 2860.134232
## iter  90 value 2848.552410
## iter 100 value 2841.594289
## final  value 2841.594289 
## stopped after 100 iterations
## # weights:  771
## initial  value 8600.016886 
## iter  10 value 3181.684562
## iter  20 value 3174.019852
## iter  30 value 3083.813432
## iter  40 value 2931.820085
## iter  50 value 2906.693364
## iter  60 value 2899.600727
## iter  70 value 2893.957575
## iter  80 value 2888.429971
## iter  90 value 2844.705290
## iter 100 value 2814.751672
## final  value 2814.751672 
## stopped after 100 iterations
## # weights:  221
## initial  value 15192.729718 
## iter  10 value 3184.035105
## iter  20 value 3182.922596
## iter  30 value 3179.499739
## iter  40 value 2921.662689
## iter  50 value 2865.086428
## iter  60 value 2850.217446
## iter  70 value 2836.951134
## iter  80 value 2818.399194
## iter  90 value 2777.562984
## iter 100 value 2733.174646
## final  value 2733.174646 
## stopped after 100 iterations
## # weights:  331
## initial  value 17721.307928 
## iter  10 value 3525.287185
## iter  20 value 3030.598398
## iter  30 value 2917.254479
## iter  40 value 2911.360417
## iter  50 value 2906.024953
## iter  60 value 2829.333868
## iter  70 value 2499.236845
## iter  80 value 2258.051548
## iter  90 value 2196.426328
## iter 100 value 2144.648359
## final  value 2144.648359 
## stopped after 100 iterations
## # weights:  551
## initial  value 6791.879437 
## iter  10 value 3175.017748
## iter  20 value 3064.812839
## iter  30 value 2930.537692
## iter  40 value 2806.167919
## iter  50 value 2642.438606
## iter  60 value 2355.564654
## iter  70 value 2217.849553
## iter  80 value 2143.030954
## iter  90 value 2133.078034
## iter 100 value 2085.849348
## final  value 2085.849348 
## stopped after 100 iterations
## # weights:  771
## initial  value 15145.167293 
## iter  10 value 3063.253436
## iter  20 value 2895.358504
## iter  30 value 2862.551201
## iter  40 value 2815.809818
## iter  50 value 2758.755841
## iter  60 value 2741.044779
## iter  70 value 2722.810868
## iter  80 value 2714.959606
## iter  90 value 2706.036479
## iter 100 value 2559.798981
## final  value 2559.798981 
## stopped after 100 iterations
## # weights:  551
## initial  value 15761.835171 
## iter  10 value 4767.064662
## iter  20 value 4735.684140
## iter  30 value 4727.024070
## iter  40 value 4623.855125
## iter  50 value 4113.938595
## iter  60 value 3629.048390
## iter  70 value 3580.696235
## iter  80 value 3564.600301
## iter  90 value 3528.658234
## iter 100 value 3509.068680
## final  value 3509.068680 
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n4_NN1Fit0
## Neural Network 
## 
## 19829 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 13219, 13220, 13219 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.9435677  0.2827854
##   2     0.5    0.9426092  0.3250682
##   2     0.7    0.9419535  0.2972836
##   3     0.3    0.9414998  0.3058963
##   3     0.5    0.9407431  0.2240087
##   3     0.7    0.9413485  0.2886483
##   5     0.3    0.9455848  0.3153222
##   5     0.5    0.9430633  0.2625668
##   5     0.7    0.9421556  0.3202957
##   7     0.3    0.9448787  0.3188726
##   7     0.5    0.9433657  0.2950004
##   7     0.7    0.9440720  0.2998438
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_PC_5.60.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.9426626 0.3097635    Fold1
## 2 0.9473525 0.3290017    Fold3
## 3 0.9467393 0.3072013    Fold2
ad_tda_pc_5.60.5_n4_nn1_fit_re<-Adult_TDA_PC_5.60.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n4_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00    -0.12     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.01     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     5.60    -0.02     1.27    -0.93     3.15    -0.01     0.69    -0.04 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.70     2.06     0.11     0.00     2.03    -0.43     0.13     1.15 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.72     1.07     0.53     0.43     2.21    -1.04    -1.71     2.42 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -1.20     0.22    -1.20     0.28     0.07     0.35    -0.09    -0.59 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##    -0.16     2.99     2.69     0.41     1.26     1.99     0.17     4.74 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -1.78     2.48     1.36     2.39     2.13     0.61    -3.78    -2.73 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -1.23    -1.90    -0.12    11.45    -0.83     1.69     3.80     0.84 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##   -11.36     0.81     1.17     1.99     0.61     1.01     2.43     3.17 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00    -0.01    -0.24     0.32     0.03     0.58     0.12    -0.05 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.81     0.47    -0.51     0.14    -0.15     0.16     0.43     0.01 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##    -0.71     0.68     0.00     0.30     0.09    -0.01     0.61     0.32 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##    -0.46    -1.44     0.46    -0.14    -0.07     1.84     0.36     0.13 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.26     0.22     0.03     0.23     0.02     0.07     0.34     0.32 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##    -0.02     0.05    -0.56     0.22     0.10 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00    -0.05     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.01    -0.02    -0.03     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     1.68    -0.03    -0.18     0.02    -0.51     0.07     0.60     1.01 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.57    -0.04     0.15     0.00     0.63     1.09     0.69     0.16 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##    -0.14     0.34    -0.31    -0.39    -0.09    -0.14    -1.81     0.34 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.01     0.17     0.94     0.20    -0.42     1.81    -0.26    -2.56 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.72     0.74     1.17     0.05    -0.11    -0.88     0.05    -3.26 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##    -0.34    -0.22     0.53     1.42     0.94     0.35     1.91     0.61 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##    -0.06    -0.29     1.02    -3.27     1.79     1.46     1.45     0.14 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.11     0.62    -0.56     0.06     0.53     1.03     2.54    -0.86 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.01    -0.04    -0.29     0.00     0.24     0.06     0.25 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.31     0.17     0.07     0.09    -0.17     0.31     0.66     0.20 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##    -0.48     0.06    -0.01     0.05     0.03    -0.02    -0.13     0.26 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##    -0.11    -0.09    -0.07    -0.25    -0.03     0.33     0.09     0.12 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.10    -0.39     0.20    -0.07     0.13     0.03    -0.12     0.29 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.04     0.06     0.08    -0.11    -0.20 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.64    -1.03    -0.01    -0.11     0.69     0.01    -0.22     0.12 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.15     0.02     0.00     0.00     0.27     0.12     0.10     0.02 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.06    -0.02     0.11     0.24     0.16    -0.17    -0.20     0.78 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##    -0.17     0.00    -0.41    -0.24    -0.81     0.07     0.00    -0.29 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.12     0.67     0.13    -0.06     0.00    -0.51     0.01     0.02 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.36     0.20     0.32     0.26    -0.01     0.01     0.44     0.07 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##    -0.80     0.13     0.15     1.28     0.11     0.33     0.63    -0.07 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##    -1.64    -0.04     0.36     0.05     0.13     0.13    -1.24     1.87 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00    -0.08     0.91    -0.27     0.00    -0.05     0.03    -0.14 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##    -0.03    -0.01    -0.01     0.05    -0.03    -0.02     0.19    -0.05 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.02     0.01     0.00     0.00     0.01     0.00     0.01     0.02 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.05     0.03    -0.01     0.08     0.01     0.19     0.02     0.02 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.01     0.05    -0.13     0.06     0.05     0.04     0.09    -0.02 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.02     0.02     0.32     0.02     0.00 
##   b->o  h1->o  h2->o  h3->o  h4->o  h5->o 
##  -0.89  -1.17  -3.12   0.56  -1.20  -0.84
vip(Adult_TDA_PC_5.60.5_n4_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.60.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.60.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n4_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n4_3_fold
##      Accuracy
## 1 -0.13471213
## 2 -0.09726037
## 3 -0.14534419
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9905333
## 
## $winRope
## [1] 0.009466667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n4_3_fold
## $left
## [1] 0.9897471
## 
## $rope
## [1] 0.002735418
## 
## $right
## [1] 0.007517514
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold))
#bf_tda_pca_5.60.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold)
## t = -8.6248, df = 2, p-value = 0.01318
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.18851600 -0.06302845
## sample estimates:
##  mean of x 
## -0.1257722
### Test set diff
diff_tda_pca_5.60.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n4_test
##   Accuracy 
## 0.04586405
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n4_test_odds.left<-bst_tda_pca_5.60.5_nn1.n4_test$probLeft/bst_tda_pca_5.60.5_nn1.n4_test$probRight
bst_tda_pca_5.60.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1614667
## 
## $winRight
## [1] 0.8385333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n4_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n4_test)) #bf_tda_pca_5.60.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test))


##Node5

#Neural Network 1
Adult_TDA_PC_5.60.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 5744.319145 
## iter  10 value 529.818820
## iter  20 value 510.499806
## iter  30 value 469.733949
## iter  40 value 466.230785
## iter  50 value 466.078871
## iter  60 value 465.978826
## iter  70 value 465.963818
## final  value 465.963073 
## converged
## # weights:  331
## initial  value 5446.402384 
## iter  10 value 512.538285
## iter  20 value 509.511745
## iter  30 value 509.508676
## final  value 509.508612 
## converged
## # weights:  551
## initial  value 4789.671418 
## iter  10 value 514.692218
## iter  20 value 465.329815
## iter  30 value 463.718995
## iter  40 value 463.681077
## iter  50 value 463.588836
## final  value 463.588792 
## converged
## # weights:  771
## initial  value 9727.499859 
## iter  10 value 624.092746
## iter  20 value 474.510850
## iter  30 value 465.532539
## iter  40 value 456.566003
## iter  50 value 451.021045
## iter  60 value 449.962348
## iter  70 value 447.862393
## iter  80 value 447.747284
## iter  90 value 447.581743
## iter 100 value 447.113692
## final  value 447.113692 
## stopped after 100 iterations
## # weights:  221
## initial  value 12546.602891 
## iter  10 value 528.682879
## iter  20 value 517.466258
## iter  30 value 513.146511
## iter  40 value 495.850633
## iter  50 value 470.665466
## iter  60 value 469.527518
## iter  70 value 469.436900
## iter  80 value 468.826019
## iter  90 value 460.216695
## iter 100 value 453.059896
## final  value 453.059896 
## stopped after 100 iterations
## # weights:  331
## initial  value 4568.715821 
## iter  10 value 517.688694
## iter  20 value 511.885457
## iter  30 value 500.469364
## iter  40 value 499.921136
## iter  50 value 477.866822
## iter  60 value 475.126794
## iter  70 value 463.036067
## iter  80 value 460.677976
## iter  90 value 459.306339
## iter 100 value 459.206155
## final  value 459.206155 
## stopped after 100 iterations
## # weights:  551
## initial  value 9728.463299 
## iter  10 value 527.486788
## iter  20 value 512.842463
## iter  30 value 511.818931
## iter  40 value 511.482939
## iter  50 value 490.865844
## iter  60 value 476.388272
## iter  70 value 474.534711
## iter  80 value 472.193579
## iter  90 value 471.898340
## iter 100 value 471.897463
## final  value 471.897463 
## stopped after 100 iterations
## # weights:  771
## initial  value 4833.386935 
## iter  10 value 510.670986
## iter  20 value 510.085940
## iter  30 value 509.835623
## iter  40 value 486.201501
## iter  50 value 464.890123
## iter  60 value 463.378948
## iter  70 value 462.344129
## iter  80 value 460.921404
## iter  90 value 459.990182
## iter 100 value 456.672611
## final  value 456.672611 
## stopped after 100 iterations
## # weights:  221
## initial  value 8236.221618 
## iter  10 value 564.735986
## iter  20 value 556.639252
## iter  30 value 526.113151
## iter  40 value 501.567774
## iter  50 value 483.992562
## iter  60 value 480.212485
## iter  70 value 477.996847
## iter  80 value 477.539900
## iter  90 value 475.653586
## iter 100 value 467.254597
## final  value 467.254597 
## stopped after 100 iterations
## # weights:  331
## initial  value 5769.826818 
## iter  10 value 518.481365
## iter  20 value 512.466001
## iter  30 value 512.404822
## iter  40 value 485.976366
## iter  50 value 468.085955
## iter  60 value 465.700517
## iter  70 value 464.140859
## iter  80 value 461.571077
## iter  90 value 428.981117
## iter 100 value 408.692582
## final  value 408.692582 
## stopped after 100 iterations
## # weights:  551
## initial  value 8711.121441 
## iter  10 value 543.473074
## iter  20 value 513.706485
## iter  30 value 513.016507
## iter  40 value 511.106873
## iter  50 value 510.605340
## iter  60 value 479.964587
## iter  70 value 472.936586
## iter  80 value 472.426800
## iter  90 value 472.423073
## final  value 472.423062 
## converged
## # weights:  771
## initial  value 4900.994750 
## iter  10 value 551.882979
## iter  20 value 504.207314
## iter  30 value 501.719517
## iter  40 value 501.500664
## iter  50 value 476.920411
## iter  60 value 467.355233
## iter  70 value 459.482623
## iter  80 value 434.398228
## iter  90 value 409.534361
## iter 100 value 407.631610
## final  value 407.631610 
## stopped after 100 iterations
## # weights:  221
## initial  value 6657.709644 
## iter  10 value 512.529786
## iter  20 value 498.991152
## iter  30 value 463.713605
## iter  40 value 457.987267
## iter  50 value 456.634079
## iter  60 value 454.072880
## iter  70 value 451.917684
## iter  80 value 437.771128
## iter  90 value 436.433228
## iter 100 value 436.030981
## final  value 436.030981 
## stopped after 100 iterations
## # weights:  331
## initial  value 8564.650082 
## iter  10 value 513.690398
## iter  20 value 512.244659
## iter  30 value 460.932181
## iter  40 value 459.216797
## iter  50 value 457.358620
## iter  60 value 457.176808
## iter  70 value 456.493614
## final  value 456.492582 
## converged
## # weights:  551
## initial  value 6988.321102 
## iter  10 value 508.353802
## iter  20 value 506.200610
## iter  30 value 485.825824
## iter  40 value 466.495919
## iter  50 value 453.136293
## iter  60 value 452.268900
## iter  70 value 449.438731
## iter  80 value 444.344866
## iter  90 value 436.986419
## iter 100 value 433.449859
## final  value 433.449859 
## stopped after 100 iterations
## # weights:  771
## initial  value 12957.124366 
## iter  10 value 554.346890
## iter  20 value 471.265413
## iter  30 value 460.080303
## iter  40 value 457.724202
## iter  50 value 454.600215
## iter  60 value 445.195934
## iter  70 value 429.773230
## iter  80 value 421.737512
## iter  90 value 414.999209
## iter 100 value 406.581763
## final  value 406.581763 
## stopped after 100 iterations
## # weights:  221
## initial  value 8519.544887 
## iter  10 value 529.079982
## iter  20 value 465.261124
## iter  30 value 459.497913
## iter  40 value 451.944860
## iter  50 value 448.090366
## iter  60 value 444.752232
## iter  70 value 441.347057
## iter  80 value 440.802791
## iter  90 value 438.719419
## iter 100 value 432.522046
## final  value 432.522046 
## stopped after 100 iterations
## # weights:  331
## initial  value 5685.335268 
## iter  10 value 508.725814
## iter  20 value 507.151162
## iter  30 value 477.534391
## iter  40 value 465.922383
## iter  50 value 461.420369
## iter  60 value 461.268208
## iter  70 value 461.253914
## iter  80 value 458.118444
## iter  90 value 448.023231
## iter 100 value 447.235003
## final  value 447.235003 
## stopped after 100 iterations
## # weights:  551
## initial  value 13774.946913 
## iter  10 value 509.166427
## iter  20 value 481.758503
## iter  30 value 467.808807
## iter  40 value 446.474395
## iter  50 value 445.044142
## iter  60 value 444.253630
## iter  70 value 441.508604
## iter  80 value 440.471817
## iter  90 value 439.643231
## iter 100 value 439.528749
## final  value 439.528749 
## stopped after 100 iterations
## # weights:  771
## initial  value 19235.290237 
## iter  10 value 517.241428
## iter  20 value 506.261843
## iter  30 value 492.547737
## iter  40 value 465.143816
## iter  50 value 462.960671
## iter  60 value 462.005302
## iter  70 value 458.343491
## iter  80 value 449.892479
## iter  90 value 435.254656
## iter 100 value 423.249798
## final  value 423.249798 
## stopped after 100 iterations
## # weights:  221
## initial  value 7469.413957 
## iter  10 value 514.319064
## iter  20 value 510.278415
## iter  30 value 504.194094
## iter  40 value 487.120834
## iter  50 value 472.757394
## iter  60 value 465.497980
## iter  70 value 453.949706
## iter  80 value 452.679734
## iter  90 value 452.638890
## iter 100 value 452.600877
## final  value 452.600877 
## stopped after 100 iterations
## # weights:  331
## initial  value 11114.887678 
## iter  10 value 538.810521
## iter  20 value 530.294489
## iter  30 value 473.869910
## iter  40 value 433.605750
## iter  50 value 394.665715
## iter  60 value 384.265163
## iter  70 value 383.223445
## iter  80 value 383.125525
## iter  90 value 382.984377
## iter 100 value 382.976870
## final  value 382.976870 
## stopped after 100 iterations
## # weights:  551
## initial  value 4887.726055 
## iter  10 value 555.239197
## iter  20 value 492.355948
## iter  30 value 457.569491
## iter  40 value 446.150375
## iter  50 value 438.965633
## iter  60 value 424.560087
## iter  70 value 416.121554
## iter  80 value 415.427769
## iter  90 value 403.779804
## iter 100 value 395.538274
## final  value 395.538274 
## stopped after 100 iterations
## # weights:  771
## initial  value 6769.310794 
## iter  10 value 770.992757
## iter  20 value 508.084111
## iter  30 value 495.589740
## iter  40 value 476.178686
## iter  50 value 469.189048
## iter  60 value 464.460221
## iter  70 value 455.699350
## iter  80 value 447.920111
## iter  90 value 445.223690
## iter 100 value 444.881900
## final  value 444.881900 
## stopped after 100 iterations
## # weights:  221
## initial  value 16326.122167 
## iter  10 value 635.278690
## iter  20 value 514.715316
## iter  30 value 504.441910
## iter  40 value 489.223384
## iter  50 value 478.296733
## iter  60 value 474.820788
## iter  70 value 472.758898
## iter  80 value 471.248799
## iter  90 value 470.951009
## iter 100 value 467.711320
## final  value 467.711320 
## stopped after 100 iterations
## # weights:  331
## initial  value 9022.382860 
## iter  10 value 515.020284
## iter  20 value 494.836856
## iter  30 value 445.759777
## iter  40 value 393.672025
## iter  50 value 386.199551
## final  value 385.698846 
## converged
## # weights:  551
## initial  value 14509.334721 
## iter  10 value 849.163485
## iter  20 value 513.140193
## iter  30 value 510.369675
## iter  40 value 510.270878
## iter  50 value 509.137037
## iter  60 value 503.876267
## iter  70 value 454.831760
## iter  80 value 425.115300
## iter  90 value 417.409083
## iter 100 value 410.733317
## final  value 410.733317 
## stopped after 100 iterations
## # weights:  771
## initial  value 11281.506291 
## iter  10 value 697.647766
## iter  20 value 509.367401
## iter  30 value 480.146169
## iter  40 value 468.646012
## iter  50 value 439.868736
## iter  60 value 438.492237
## iter  70 value 437.049750
## iter  80 value 409.503280
## iter  90 value 390.223824
## iter 100 value 384.410660
## final  value 384.410660 
## stopped after 100 iterations
## # weights:  221
## initial  value 6384.261533 
## iter  10 value 513.571845
## iter  20 value 513.568031
## iter  30 value 476.441502
## iter  40 value 471.598170
## iter  50 value 458.590627
## iter  60 value 457.081374
## iter  70 value 456.967952
## iter  80 value 456.657810
## iter  90 value 456.054236
## iter 100 value 455.823674
## final  value 455.823674 
## stopped after 100 iterations
## # weights:  331
## initial  value 5425.588634 
## iter  10 value 525.363341
## iter  20 value 519.428935
## iter  30 value 485.255351
## iter  40 value 476.747230
## iter  50 value 464.542410
## iter  60 value 461.960984
## iter  70 value 461.007174
## iter  80 value 456.503204
## iter  90 value 453.699200
## iter 100 value 451.205291
## final  value 451.205291 
## stopped after 100 iterations
## # weights:  551
## initial  value 8982.507571 
## iter  10 value 491.790085
## iter  20 value 468.173252
## iter  30 value 466.227226
## iter  40 value 466.159977
## iter  50 value 463.754555
## iter  60 value 462.106878
## iter  70 value 461.435056
## iter  80 value 461.382917
## iter  90 value 461.361078
## iter 100 value 461.102746
## final  value 461.102746 
## stopped after 100 iterations
## # weights:  771
## initial  value 8910.233964 
## iter  10 value 517.644002
## iter  20 value 474.877542
## iter  30 value 471.164247
## iter  40 value 459.982799
## iter  50 value 454.325169
## iter  60 value 448.410252
## iter  70 value 445.455713
## iter  80 value 445.294998
## iter  90 value 445.233850
## iter 100 value 444.772027
## final  value 444.772027 
## stopped after 100 iterations
## # weights:  221
## initial  value 10904.936002 
## iter  10 value 540.880793
## iter  20 value 513.178322
## iter  30 value 472.736623
## iter  40 value 470.711787
## iter  50 value 470.348302
## iter  60 value 469.326253
## iter  70 value 468.484118
## iter  80 value 465.988809
## iter  90 value 449.339196
## iter 100 value 446.457557
## final  value 446.457557 
## stopped after 100 iterations
## # weights:  331
## initial  value 8805.300389 
## iter  10 value 539.971702
## iter  20 value 506.531270
## iter  30 value 480.788798
## iter  40 value 465.013440
## iter  50 value 457.788708
## iter  60 value 457.641176
## iter  70 value 457.438871
## iter  80 value 456.683416
## iter  90 value 451.680347
## iter 100 value 447.896511
## final  value 447.896511 
## stopped after 100 iterations
## # weights:  551
## initial  value 16741.876046 
## iter  10 value 662.369923
## iter  20 value 512.576860
## iter  30 value 511.838965
## iter  40 value 480.001930
## iter  50 value 469.892945
## iter  60 value 464.530940
## iter  70 value 459.221985
## iter  80 value 458.711448
## iter  90 value 457.370387
## iter 100 value 452.080807
## final  value 452.080807 
## stopped after 100 iterations
## # weights:  771
## initial  value 14710.520363 
## iter  10 value 519.102588
## iter  20 value 488.600552
## iter  30 value 464.092053
## iter  40 value 461.703874
## iter  50 value 458.300179
## iter  60 value 453.885813
## iter  70 value 452.148766
## iter  80 value 448.499457
## iter  90 value 447.506977
## iter 100 value 443.215628
## final  value 443.215628 
## stopped after 100 iterations
## # weights:  331
## initial  value 8081.991600 
## iter  10 value 719.850195
## iter  20 value 713.105264
## iter  30 value 694.041719
## iter  40 value 687.118684
## iter  50 value 660.927249
## iter  60 value 626.971663
## iter  70 value 603.953913
## iter  80 value 583.131858
## iter  90 value 578.753232
## iter 100 value 578.715980
## final  value 578.715980 
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n5_NN1Fit0
## Neural Network 
## 
## 16508 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11006, 11004, 11006 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa        
##   2     0.3    0.9924884   0.0980688588
##   2     0.5    0.9929125   0.2418254442
##   2     0.7    0.9924280   0.0825568948
##   3     0.3    0.9920645  -0.0001184563
##   3     0.5    0.9927309   0.1758420031
##   3     0.7    0.9930337   0.2636034862
##   5     0.3    0.9924884   0.0980688588
##   5     0.5    0.9924884   0.0980688588
##   5     0.7    0.9923673   0.0944195576
##   7     0.3    0.9925491   0.1494826371
##   7     0.5    0.9926703   0.1655346275
##   7     0.7    0.9921250   0.0000000000
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.7.
Adult_TDA_PC_5.60.5_n5_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.9929117 0.2025554    Fold3
## 2 0.9930959 0.2942066    Fold2
## 3 0.9930934 0.2940485    Fold1
ad_tda_pc_5.60.5_n5_nn1_fit_re<-Adult_TDA_PC_5.60.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n5_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     1.34    -0.03     0.12    -0.21     0.04     0.01    -0.29     0.49 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.92     0.23     0.03     0.00     0.91    -0.30     0.06     0.41 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##    -0.23     0.77     0.32     0.08    -0.13    -0.45     0.05     0.05 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -0.13     0.18     0.14    -0.41     0.06     0.08    -1.20     0.45 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.58     0.38     0.77     0.28     0.13     0.07     0.00    -0.26 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.29     0.39     0.72     0.61     0.00     0.73     0.43    -1.12 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -0.18    -0.37     0.50     0.02     0.09     0.69     0.47     0.20 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.13     0.45     0.02     0.33    -0.09     0.64    -0.52     1.86 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00    -0.03     0.64     0.06     0.28     0.16     0.19 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.33     0.27     0.07     0.24    -0.31     0.09     0.40     0.03 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.01     0.19     0.00     0.04     0.06     0.05    -0.39     0.05 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##    -0.44     0.20    -0.23    -0.61    -0.51     0.19     0.10     0.12 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.11    -0.36     0.14    -0.78     0.41     0.04     0.20     0.11 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.12     0.13     0.24    -0.31     0.01 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.05     0.45     0.00     0.00     0.00     0.00     0.04     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.01     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00    -0.10     0.00     0.02     0.00    -0.07 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.20     0.42     0.20     0.00     0.01 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00    -0.19     0.04     0.00     0.00    -0.03     0.00     0.01 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.02     0.00     0.04     0.00     0.01     0.00    -0.02     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.02     0.00     0.01     0.00     0.12     0.00     0.02    -0.09 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.02    -0.07    -0.01     0.00     0.11     0.02     0.03 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.05     0.13     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.02     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.03     0.00     0.00 
##  b->o h1->o h2->o h3->o 
##  0.54  0.28 -5.55 -2.63
vip(Adult_TDA_PC_5.60.5_n5_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  2157
##      >50K       2   195
##                                           
##                Accuracy : 0.779           
##                  95% CI : (0.7706, 0.7872)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 2.137e-06       
##                                           
##                   Kappa : 0.1203          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99973         
##             Specificity : 0.08291         
##          Pos Pred Value : 0.77463         
##          Neg Pred Value : 0.98985         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75901         
##    Detection Prevalence : 0.97983         
##       Balanced Accuracy : 0.54132         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  2157
##      >50K       2   195
##                                           
##                Accuracy : 0.779           
##                  95% CI : (0.7706, 0.7872)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 2.137e-06       
##                                           
##                   Kappa : 0.1203          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99973         
##             Specificity : 0.08291         
##          Pos Pred Value : 0.77463         
##          Neg Pred Value : 0.98985         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75901         
##    Detection Prevalence : 0.97983         
##       Balanced Accuracy : 0.54132         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.789722e-01   1.202586e-01   7.706098e-01   7.871679e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   2.136509e-06   0.000000e+00
ad_tda_pc_5.60.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99973031           0.08290816           0.77463170 
##       Neg Pred Value            Precision               Recall 
##           0.98984772           0.77463170           0.99973031 
##                   F1           Prevalence       Detection Rate 
##           0.87290281           0.75921376           0.75900901 
## Detection Prevalence    Balanced Accuracy 
##           0.97983210           0.54131924
ad_tda_pc_5.60.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n5_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n5_3_fold
##     Accuracy
## 1 -0.1849612
## 2 -0.1430038
## 3 -0.1916983
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9904667
## 
## $winRope
## [1] 0.009533333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n5_3_fold
## $left
## [1] 0.9942923
## 
## $rope
## [1] 0.001162052
## 
## $right
## [1] 0.00454561
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold))
#bf_tda_pca_5.60.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold)
## t = -11.371, df = 2, p-value = 0.007645
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2387647 -0.1076775
## sample estimates:
##  mean of x 
## -0.1732211
### Test set diff
diff_tda_pca_5.60.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n5_test
##   Accuracy 
## 0.02610565
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nn1.n5_test_odds.left<-bst_tda_pca_5.60.5_nn1.n5_test$probLeft/bst_tda_pca_5.60.5_nn1.n5_test$probRight
bst_tda_pca_5.60.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1602667
## 
## $winRight
## [1] 0.8397333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n5_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n5_test)) #bf_tda_pca_5.60.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_KDE_5.60.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 8465.559983 
## iter  10 value 5829.002301
## iter  20 value 5319.297405
## iter  30 value 5299.355528
## iter  40 value 5295.398757
## iter  50 value 5181.776784
## iter  60 value 5170.895711
## iter  70 value 5151.295770
## iter  80 value 5136.449119
## iter  90 value 5111.035887
## iter 100 value 5089.800014
## final  value 5089.800014 
## stopped after 100 iterations
## # weights:  331
## initial  value 6214.957437 
## iter  10 value 5689.948347
## iter  20 value 5258.030285
## iter  30 value 5211.248445
## iter  40 value 5187.593001
## iter  50 value 5173.187762
## iter  60 value 5147.515584
## final  value 5139.704010 
## converged
## # weights:  551
## initial  value 5947.428872 
## iter  10 value 5355.604003
## iter  20 value 5312.712076
## iter  30 value 5285.882495
## iter  40 value 5267.397130
## iter  50 value 5259.116009
## iter  60 value 5208.107280
## iter  70 value 5094.149356
## iter  80 value 5032.122756
## iter  90 value 5012.069983
## iter 100 value 4990.421265
## final  value 4990.421265 
## stopped after 100 iterations
## # weights:  771
## initial  value 16651.045269 
## iter  10 value 5745.875122
## iter  20 value 5368.905534
## iter  30 value 5321.965699
## iter  40 value 5230.397376
## iter  50 value 5164.039072
## iter  60 value 5032.891004
## iter  70 value 4710.014887
## iter  80 value 4454.066398
## iter  90 value 4141.225673
## iter 100 value 3742.723359
## final  value 3742.723359 
## stopped after 100 iterations
## # weights:  221
## initial  value 5859.175865 
## iter  10 value 5465.817495
## iter  20 value 5377.056644
## iter  30 value 5359.127853
## iter  40 value 5282.128166
## iter  50 value 5208.545629
## iter  60 value 5173.000458
## iter  70 value 5136.866066
## iter  80 value 5000.719077
## iter  90 value 4454.580725
## iter 100 value 4142.264025
## final  value 4142.264025 
## stopped after 100 iterations
## # weights:  331
## initial  value 7539.309266 
## iter  10 value 5396.202195
## iter  20 value 5312.886622
## iter  30 value 5295.053578
## iter  40 value 5275.743780
## iter  50 value 5230.714335
## iter  60 value 5122.769181
## iter  70 value 4921.770492
## iter  80 value 3967.168035
## iter  90 value 3458.297058
## iter 100 value 3385.331984
## final  value 3385.331984 
## stopped after 100 iterations
## # weights:  551
## initial  value 8122.214841 
## iter  10 value 5851.600916
## iter  20 value 5828.393775
## iter  30 value 5409.889867
## iter  40 value 5405.794620
## iter  50 value 5333.972305
## iter  60 value 5306.831200
## iter  70 value 5301.919769
## iter  80 value 5177.319839
## iter  90 value 5126.153685
## iter 100 value 4879.503329
## final  value 4879.503329 
## stopped after 100 iterations
## # weights:  771
## initial  value 6351.236449 
## iter  10 value 5593.298971
## iter  20 value 5330.497058
## iter  30 value 5294.346991
## iter  40 value 5194.513233
## iter  50 value 5102.311211
## iter  60 value 5064.063684
## iter  70 value 5042.833622
## iter  80 value 5029.238263
## iter  90 value 5023.262702
## iter 100 value 5001.453952
## final  value 5001.453952 
## stopped after 100 iterations
## # weights:  221
## initial  value 6376.584910 
## iter  10 value 5810.279643
## iter  20 value 5775.587965
## iter  30 value 5771.514624
## iter  40 value 5771.366477
## iter  50 value 5536.989033
## iter  60 value 5387.301783
## iter  70 value 5300.523812
## iter  80 value 5198.360285
## iter  90 value 5187.062093
## iter 100 value 5153.847750
## final  value 5153.847750 
## stopped after 100 iterations
## # weights:  331
## initial  value 8033.482786 
## iter  10 value 5785.841349
## iter  20 value 5349.059316
## iter  30 value 5309.636990
## iter  40 value 5299.000057
## iter  50 value 5284.752590
## iter  60 value 5189.489794
## iter  70 value 5105.963999
## iter  80 value 5037.046508
## iter  90 value 4979.711987
## iter 100 value 4921.753245
## final  value 4921.753245 
## stopped after 100 iterations
## # weights:  551
## initial  value 6763.700353 
## iter  10 value 5710.858596
## iter  20 value 5298.288384
## iter  30 value 5232.925610
## iter  40 value 5198.135999
## iter  50 value 5165.606388
## iter  60 value 5042.814264
## iter  70 value 5008.686998
## iter  80 value 4942.865133
## iter  90 value 4874.973119
## iter 100 value 4465.618447
## final  value 4465.618447 
## stopped after 100 iterations
## # weights:  771
## initial  value 7210.017025 
## iter  10 value 5483.131626
## iter  20 value 5245.878143
## iter  30 value 5184.126733
## iter  40 value 5159.279176
## iter  50 value 5131.830063
## iter  60 value 5067.936722
## iter  70 value 4996.867314
## iter  80 value 4826.777695
## iter  90 value 4300.679144
## iter 100 value 3849.874966
## final  value 3849.874966 
## stopped after 100 iterations
## # weights:  221
## initial  value 7913.250856 
## iter  10 value 5407.245011
## iter  20 value 5362.955726
## iter  30 value 5321.412614
## iter  40 value 5314.118088
## iter  50 value 5301.367529
## iter  60 value 5213.465527
## iter  70 value 5125.657642
## iter  80 value 4961.743279
## iter  90 value 4836.986301
## iter 100 value 4616.470175
## final  value 4616.470175 
## stopped after 100 iterations
## # weights:  331
## initial  value 5845.500759 
## iter  10 value 5720.087488
## iter  20 value 5712.836993
## iter  30 value 5712.799313
## iter  30 value 5712.799287
## iter  40 value 5323.056680
## iter  50 value 5302.826412
## iter  60 value 5272.806736
## iter  70 value 5200.130963
## iter  80 value 5165.669793
## iter  90 value 5052.274224
## iter 100 value 4902.870031
## final  value 4902.870031 
## stopped after 100 iterations
## # weights:  551
## initial  value 6631.141780 
## iter  10 value 5641.641203
## iter  20 value 5421.443229
## iter  30 value 5171.342610
## iter  40 value 5101.740330
## iter  50 value 5082.083842
## iter  60 value 5052.634424
## iter  70 value 5011.580325
## iter  80 value 4914.761727
## iter  90 value 4593.551084
## iter 100 value 4128.260952
## final  value 4128.260952 
## stopped after 100 iterations
## # weights:  771
## initial  value 6077.353894 
## iter  10 value 5565.578720
## iter  20 value 5520.167414
## iter  30 value 5333.192735
## iter  40 value 5330.710470
## iter  50 value 5329.535528
## iter  60 value 5317.961896
## iter  70 value 5196.119309
## iter  80 value 5159.010014
## iter  90 value 5143.013448
## iter 100 value 5138.037807
## final  value 5138.037807 
## stopped after 100 iterations
## # weights:  221
## initial  value 7378.949145 
## iter  10 value 5581.619065
## iter  20 value 5377.876496
## iter  30 value 5373.698473
## iter  40 value 5371.897869
## iter  50 value 5368.368677
## iter  60 value 5189.185265
## iter  70 value 5080.940308
## iter  80 value 5017.783341
## iter  90 value 4969.579813
## iter 100 value 4683.050505
## final  value 4683.050505 
## stopped after 100 iterations
## # weights:  331
## initial  value 6941.792389 
## iter  10 value 5826.281614
## iter  20 value 5670.922667
## iter  30 value 5386.431581
## iter  40 value 5326.847831
## iter  50 value 5224.151841
## iter  60 value 5148.944794
## iter  70 value 5120.950540
## iter  80 value 5010.635298
## iter  90 value 4958.840060
## iter 100 value 4769.318765
## final  value 4769.318765 
## stopped after 100 iterations
## # weights:  551
## initial  value 6156.362291 
## iter  10 value 5827.000790
## iter  20 value 5824.039923
## iter  30 value 5344.130053
## iter  40 value 5187.183619
## iter  50 value 5105.106377
## iter  60 value 5010.472002
## iter  70 value 4536.604954
## iter  80 value 3683.564289
## iter  90 value 3554.356659
## iter 100 value 3451.084757
## final  value 3451.084757 
## stopped after 100 iterations
## # weights:  771
## initial  value 5939.768141 
## iter  10 value 5403.656511
## iter  20 value 5341.897310
## iter  30 value 5327.879159
## iter  40 value 5318.546460
## iter  50 value 5310.170903
## iter  50 value 5310.170860
## iter  60 value 5303.911885
## iter  70 value 5292.448352
## iter  80 value 5255.731739
## iter  90 value 4986.878355
## iter 100 value 4925.022304
## final  value 4925.022304 
## stopped after 100 iterations
## # weights:  221
## initial  value 8258.255589 
## iter  10 value 5830.549283
## iter  20 value 5594.505010
## iter  30 value 5494.365984
## iter  40 value 5416.801063
## iter  50 value 5260.545854
## iter  60 value 5226.171879
## iter  70 value 5200.479706
## iter  80 value 5189.483636
## iter  90 value 5165.303682
## iter 100 value 5113.398499
## final  value 5113.398499 
## stopped after 100 iterations
## # weights:  331
## initial  value 5954.211205 
## iter  10 value 5735.372298
## iter  20 value 5379.369366
## iter  30 value 5209.627118
## iter  40 value 5088.987553
## iter  50 value 5067.861080
## iter  60 value 4857.912386
## iter  70 value 4657.365836
## iter  80 value 4340.906132
## iter  90 value 3940.330988
## iter 100 value 3777.015592
## final  value 3777.015592 
## stopped after 100 iterations
## # weights:  551
## initial  value 5904.773617 
## iter  10 value 5360.180271
## iter  20 value 5237.140957
## iter  30 value 5198.874399
## iter  40 value 5168.369076
## iter  50 value 5152.557600
## iter  60 value 5117.308701
## iter  70 value 5079.444046
## iter  80 value 5055.979210
## iter  90 value 5011.820062
## iter 100 value 4967.734737
## final  value 4967.734737 
## stopped after 100 iterations
## # weights:  771
## initial  value 6811.759369 
## iter  10 value 5615.923512
## iter  20 value 5370.547746
## iter  30 value 5265.213283
## iter  40 value 5241.215139
## iter  50 value 5229.833420
## iter  60 value 5220.882839
## iter  70 value 5197.739559
## iter  80 value 5120.996718
## iter  90 value 5029.203743
## iter 100 value 5012.711509
## final  value 5012.711509 
## stopped after 100 iterations
## # weights:  221
## initial  value 7289.737966 
## iter  10 value 5634.553356
## iter  20 value 5425.952790
## iter  30 value 5240.199337
## iter  40 value 5115.857597
## iter  50 value 5006.891952
## iter  60 value 4927.928523
## iter  70 value 4832.169442
## iter  80 value 4672.859151
## iter  90 value 3951.081596
## iter 100 value 3798.484431
## final  value 3798.484431 
## stopped after 100 iterations
## # weights:  331
## initial  value 6298.035331 
## iter  10 value 5470.177218
## iter  20 value 5373.344217
## iter  30 value 5280.713412
## iter  40 value 5184.828443
## iter  50 value 5162.871260
## iter  60 value 5024.389911
## iter  70 value 4823.797658
## iter  80 value 4441.558408
## iter  90 value 3888.155514
## iter 100 value 3583.621289
## final  value 3583.621289 
## stopped after 100 iterations
## # weights:  551
## initial  value 9515.677817 
## iter  10 value 5637.707632
## iter  20 value 5196.656240
## iter  30 value 5182.139262
## iter  40 value 5166.925578
## iter  50 value 5131.688468
## iter  60 value 4820.998390
## iter  70 value 4230.497906
## iter  80 value 3878.872060
## iter  90 value 3508.988663
## iter 100 value 3349.886175
## final  value 3349.886175 
## stopped after 100 iterations
## # weights:  771
## initial  value 12262.757594 
## iter  10 value 5338.300624
## iter  20 value 5290.037819
## iter  30 value 5281.911666
## iter  40 value 5279.537993
## iter  50 value 5278.708911
## iter  60 value 5275.530593
## iter  70 value 5273.626062
## iter  80 value 5273.449319
## iter  90 value 5273.418933
## iter 100 value 5273.132851
## final  value 5273.132851 
## stopped after 100 iterations
## # weights:  221
## initial  value 8130.417557 
## iter  10 value 5441.223758
## iter  20 value 5348.547086
## iter  30 value 5301.801866
## iter  40 value 5224.124080
## iter  50 value 5159.270973
## iter  60 value 5142.092282
## iter  70 value 5038.744123
## iter  80 value 4687.602362
## iter  90 value 4068.393684
## iter 100 value 3752.209078
## final  value 3752.209078 
## stopped after 100 iterations
## # weights:  331
## initial  value 6789.940751 
## iter  10 value 5828.503300
## iter  20 value 5319.955275
## iter  30 value 5259.534602
## iter  40 value 5228.223293
## iter  50 value 5169.642494
## iter  60 value 5146.234919
## iter  70 value 5132.383233
## iter  80 value 5123.182417
## iter  90 value 5105.148773
## iter 100 value 5093.434348
## final  value 5093.434348 
## stopped after 100 iterations
## # weights:  551
## initial  value 9931.498561 
## iter  10 value 5606.695441
## iter  20 value 5252.040874
## iter  30 value 5228.079724
## iter  40 value 5170.601395
## iter  50 value 5050.800364
## iter  60 value 4997.838110
## iter  70 value 4988.223297
## iter  80 value 4965.089046
## iter  90 value 4843.971904
## iter 100 value 4584.092429
## final  value 4584.092429 
## stopped after 100 iterations
## # weights:  771
## initial  value 5993.923515 
## iter  10 value 5651.488522
## iter  20 value 5206.429488
## iter  30 value 5177.369346
## iter  40 value 5157.607449
## iter  50 value 5111.819517
## iter  60 value 5085.513149
## iter  70 value 5032.791467
## iter  80 value 5002.089594
## iter  90 value 4978.042661
## iter 100 value 4970.370896
## final  value 4970.370896 
## stopped after 100 iterations
## # weights:  221
## initial  value 6001.192249 
## iter  10 value 5831.643987
## iter  20 value 5673.980941
## iter  30 value 5304.104590
## iter  40 value 5221.422321
## iter  50 value 5172.007336
## iter  60 value 5068.635060
## iter  70 value 5038.969415
## iter  80 value 4977.149016
## iter  90 value 4845.949872
## iter 100 value 4585.122560
## final  value 4585.122560 
## stopped after 100 iterations
## # weights:  331
## initial  value 8598.513167 
## iter  10 value 5830.225147
## iter  20 value 5823.738143
## iter  30 value 5310.763103
## iter  40 value 5309.076320
## iter  50 value 5233.145139
## iter  60 value 5227.981022
## iter  70 value 5225.301041
## iter  80 value 5126.895117
## iter  90 value 5112.086497
## iter 100 value 5106.153433
## final  value 5106.153433 
## stopped after 100 iterations
## # weights:  551
## initial  value 7719.113823 
## iter  10 value 5717.211894
## iter  20 value 5345.221032
## iter  30 value 5294.653976
## iter  40 value 5173.472582
## iter  50 value 5159.384127
## iter  60 value 5144.735077
## iter  70 value 5126.889711
## iter  80 value 5120.410588
## iter  90 value 5095.362328
## iter 100 value 5035.538761
## final  value 5035.538761 
## stopped after 100 iterations
## # weights:  771
## initial  value 5921.163662 
## iter  10 value 5832.005899
## iter  20 value 5795.602556
## iter  30 value 5419.215000
## iter  40 value 5120.697459
## iter  50 value 5110.017843
## iter  60 value 5105.865317
## iter  70 value 5099.848894
## iter  80 value 5047.302233
## iter  90 value 4894.789169
## iter 100 value 4706.273382
## final  value 4706.273382 
## stopped after 100 iterations
## # weights:  551
## initial  value 9601.049679 
## iter  10 value 8597.339241
## iter  20 value 8419.280458
## iter  30 value 8392.290174
## iter  40 value 8199.473271
## iter  50 value 8015.764802
## iter  60 value 7758.171208
## iter  70 value 7750.313576
## iter  80 value 7721.666428
## iter  90 value 7704.991074
## iter 100 value 7679.384189
## final  value 7679.384189 
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n1_NN1Fit0
## Neural Network 
## 
## 15260 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10173, 10174, 10173 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8206424  0.4694431
##   2     0.5    0.8199864  0.5252703
##   2     0.7    0.7922017  0.3551556
##   3     0.3    0.8145472  0.4378342
##   3     0.5    0.8127116  0.4158853
##   3     0.7    0.8157295  0.4272610
##   5     0.3    0.8250326  0.5076988
##   5     0.5    0.8149434  0.4802810
##   5     0.7    0.8010483  0.3518693
##   7     0.3    0.8015065  0.3862242
##   7     0.5    0.7974439  0.3319158
##   7     0.7    0.8086492  0.4435108
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n1_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8057794 0.3726672    Fold1
## 2 0.8472577 0.5928479    Fold3
## 3 0.8220606 0.5575812    Fold2
ad_tda_kde_5.60.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n1_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.01     0.53     0.00     0.00     0.00     0.00    -0.01     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00    -0.01     0.00     0.00     0.00     0.00    -0.01 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00    -0.01     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.01     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.01     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##    -0.12     0.70     0.27     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.01     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##    -0.09    -1.12    -0.13    -0.06     0.04     0.00    -0.01     0.10 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.01    -0.04     0.00     0.00    -0.05     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00    -0.04    -0.06    -0.24     0.00     0.23 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.12     0.00    -0.03    -0.03    -1.51    -0.05     0.00     0.22 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00    -0.25     0.00    -0.02    -0.13     0.01     0.00     0.05 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.25     0.01    -0.03     0.01     0.02     0.00    -0.13     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.04    -0.18     0.00     0.22    -0.25     0.00    -0.08    -0.05 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.07    -0.05     0.02     0.00     0.00    -0.06    -0.17     0.08 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.04     0.32     0.94    -0.06     0.00     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.01     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00    -0.04     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.03     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##    -0.03    -0.05     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o 
##  0.18 -0.01 -1.85  0.25  1.73  0.13
vip(Adult_TDA_KDE_5.60.5_n1_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7095  1645
##      >50K     321   707
##                                           
##                Accuracy : 0.7987          
##                  95% CI : (0.7906, 0.8066)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3185          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9567          
##             Specificity : 0.3006          
##          Pos Pred Value : 0.8118          
##          Neg Pred Value : 0.6877          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7264          
##    Detection Prevalence : 0.8948          
##       Balanced Accuracy : 0.6287          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7095  1645
##      >50K     321   707
##                                           
##                Accuracy : 0.7987          
##                  95% CI : (0.7906, 0.8066)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3185          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9567          
##             Specificity : 0.3006          
##          Pos Pred Value : 0.8118          
##          Neg Pred Value : 0.6877          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7264          
##    Detection Prevalence : 0.8948          
##       Balanced Accuracy : 0.6287          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.987305e-01   3.185308e-01   7.906396e-01   8.066430e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   6.604999e-21  1.260623e-195
ad_tda_kde_5.60.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9567152            0.3005952            0.8117849 
##       Neg Pred Value            Precision               Recall 
##            0.6877432            0.8117849            0.9567152 
##                   F1           Prevalence       Detection Rate 
##            0.8783115            0.7592138            0.7263514 
## Detection Prevalence    Balanced Accuracy 
##            0.8947584            0.6286552
ad_tda_kde_5.60.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n1_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n1_3_fold
##       Accuracy
## 1  0.002171069
## 2  0.002834414
## 3 -0.020665454
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n1_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n1_3_fold
## $winLeft
## [1] 0.09123333
## 
## $winRope
## [1] 0.9087667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n1_3_fold
## $left
## [1] 0.3228351
## 
## $rope
## [1] 0.5621235
## 
## $right
## [1] 0.1150414
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold))
#bf_tda_kde_5.60.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold)
## t = -0.67572, df = 2, p-value = 0.5689
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03845844  0.02801846
## sample estimates:
##    mean of x 
## -0.005219991
### Test set diff
diff_tda_kde_5.60.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n1_test
##    Accuracy 
## 0.006347256
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n1_test_odds.left<-bst_tda_kde_5.60.5_nn1.n1_test$probLeft/bst_tda_kde_5.60.5_nn1.n1_test$probRight
bst_tda_kde_5.60.5_nn1.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n1_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n1_test)) #bf_tda_pca_5.60.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test))

##Node2

#Neural Network 1
Adult_TDA_KDE_5.60.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 5193.340449 
## iter  10 value 4936.486372
## iter  20 value 4844.741276
## iter  30 value 4830.134491
## iter  40 value 4794.468352
## iter  50 value 4779.590392
## iter  60 value 4714.816033
## iter  70 value 4705.687500
## iter  80 value 4680.198096
## iter  90 value 4372.951538
## iter 100 value 3941.728216
## final  value 3941.728216 
## stopped after 100 iterations
## # weights:  331
## initial  value 7147.332727 
## iter  10 value 5158.915513
## iter  20 value 4893.940032
## iter  30 value 4823.342876
## iter  40 value 4793.924642
## iter  50 value 4736.573509
## iter  60 value 4613.140184
## iter  70 value 4004.594248
## iter  80 value 3662.764537
## iter  90 value 3595.136690
## iter 100 value 3239.958251
## final  value 3239.958251 
## stopped after 100 iterations
## # weights:  551
## initial  value 7324.448618 
## iter  10 value 5066.739781
## iter  20 value 4841.833049
## iter  30 value 4797.676440
## iter  40 value 4795.580504
## iter  50 value 4794.822404
## iter  60 value 4789.260131
## iter  70 value 4779.284198
## iter  80 value 4706.508311
## iter  90 value 4681.143599
## iter 100 value 4673.577097
## final  value 4673.577097 
## stopped after 100 iterations
## # weights:  771
## initial  value 7151.551467 
## iter  10 value 5123.034709
## iter  20 value 4763.933946
## iter  30 value 4748.088757
## iter  40 value 4711.083186
## iter  50 value 4311.244498
## iter  60 value 3721.104523
## iter  70 value 3624.054322
## iter  80 value 3591.536906
## iter  90 value 3462.683481
## iter 100 value 3254.113763
## final  value 3254.113763 
## stopped after 100 iterations
## # weights:  221
## initial  value 6838.078807 
## iter  10 value 5160.912642
## iter  20 value 5077.777187
## iter  30 value 4941.969937
## iter  40 value 4834.825668
## iter  50 value 4818.903548
## iter  60 value 4726.838183
## iter  70 value 4712.533102
## iter  80 value 4707.113203
## iter  90 value 4681.248898
## iter 100 value 4664.040955
## final  value 4664.040955 
## stopped after 100 iterations
## # weights:  331
## initial  value 11964.773668 
## iter  10 value 5130.180590
## iter  20 value 4832.842977
## iter  30 value 4807.260122
## iter  40 value 4805.439588
## iter  50 value 4789.400369
## iter  60 value 4759.969474
## iter  70 value 4716.229271
## iter  80 value 4600.564784
## iter  90 value 4583.165721
## iter 100 value 4490.959696
## final  value 4490.959696 
## stopped after 100 iterations
## # weights:  551
## initial  value 5273.529226 
## iter  10 value 5067.295964
## iter  20 value 4839.782196
## iter  30 value 4832.813048
## iter  40 value 4801.888379
## iter  50 value 4732.213867
## iter  60 value 4411.982236
## iter  70 value 3954.920346
## iter  80 value 3302.558754
## iter  90 value 3192.890814
## iter 100 value 3084.061866
## final  value 3084.061866 
## stopped after 100 iterations
## # weights:  771
## initial  value 5294.706477 
## iter  10 value 5136.277202
## iter  20 value 5134.544425
## iter  30 value 4813.788364
## iter  40 value 4768.744374
## iter  50 value 4721.001401
## iter  60 value 4703.302866
## iter  70 value 4688.402379
## iter  80 value 4671.581772
## iter  90 value 4646.397627
## iter 100 value 4538.430739
## final  value 4538.430739 
## stopped after 100 iterations
## # weights:  221
## initial  value 7904.679838 
## iter  10 value 4815.944666
## iter  20 value 4803.940466
## iter  30 value 4803.540328
## iter  40 value 4796.915599
## iter  50 value 4786.035359
## iter  60 value 4778.213282
## iter  70 value 4742.426035
## iter  80 value 4697.225228
## iter  90 value 4596.155150
## iter 100 value 4146.536997
## final  value 4146.536997 
## stopped after 100 iterations
## # weights:  331
## initial  value 6397.609936 
## iter  10 value 4821.627161
## iter  20 value 4801.658705
## iter  30 value 4754.086448
## iter  40 value 4703.677015
## iter  50 value 4619.991050
## iter  60 value 4417.985333
## iter  70 value 4330.898517
## iter  80 value 3965.917382
## iter  90 value 3701.783502
## iter 100 value 3236.982201
## final  value 3236.982201 
## stopped after 100 iterations
## # weights:  551
## initial  value 7441.814324 
## iter  10 value 5095.197020
## iter  20 value 4823.080663
## iter  30 value 4733.917656
## iter  40 value 4727.343180
## iter  50 value 4713.973545
## iter  60 value 4699.670551
## iter  70 value 4696.552468
## iter  80 value 4674.110204
## iter  90 value 4669.871935
## iter 100 value 4646.297447
## final  value 4646.297447 
## stopped after 100 iterations
## # weights:  771
## initial  value 6868.844668 
## iter  10 value 4818.271878
## iter  20 value 4801.885403
## iter  30 value 4798.159632
## iter  40 value 4797.295944
## iter  50 value 4777.870453
## iter  60 value 4759.718274
## iter  70 value 4322.111199
## iter  80 value 4052.290787
## iter  90 value 3877.756160
## iter 100 value 3443.280164
## final  value 3443.280164 
## stopped after 100 iterations
## # weights:  221
## initial  value 5341.705882 
## iter  10 value 5160.126821
## final  value 5160.125276 
## converged
## # weights:  331
## initial  value 5356.349022 
## iter  10 value 4926.333955
## iter  20 value 4791.889137
## iter  30 value 4706.373182
## iter  40 value 4646.062434
## iter  50 value 4626.187041
## iter  60 value 4616.193243
## iter  70 value 4124.730453
## iter  80 value 3918.607043
## iter  90 value 3825.476854
## iter 100 value 3296.623988
## final  value 3296.623988 
## stopped after 100 iterations
## # weights:  551
## initial  value 5582.218251 
## iter  10 value 5097.532608
## iter  20 value 4847.037718
## iter  30 value 4793.832308
## iter  40 value 4788.268120
## iter  50 value 4776.004739
## iter  60 value 4764.702499
## iter  70 value 4690.542387
## iter  80 value 4389.832050
## iter  90 value 3925.222631
## iter 100 value 3704.599487
## final  value 3704.599487 
## stopped after 100 iterations
## # weights:  771
## initial  value 5419.344378 
## iter  10 value 4988.139885
## iter  20 value 4762.631292
## iter  30 value 4719.358744
## iter  40 value 4631.668806
## iter  50 value 4582.929311
## iter  60 value 4571.341273
## iter  70 value 4565.228720
## iter  80 value 4559.048608
## iter  90 value 4555.473992
## iter 100 value 4553.726889
## final  value 4553.726889 
## stopped after 100 iterations
## # weights:  221
## initial  value 6400.133975 
## iter  10 value 5143.324076
## iter  20 value 4821.649184
## iter  30 value 4797.894805
## iter  40 value 4741.454961
## iter  50 value 4354.089069
## iter  60 value 4221.587732
## iter  70 value 4125.257152
## iter  80 value 4041.612482
## iter  90 value 3967.248627
## iter 100 value 3797.094306
## final  value 3797.094306 
## stopped after 100 iterations
## # weights:  331
## initial  value 5529.957657 
## iter  10 value 5052.941043
## iter  20 value 4848.553977
## iter  30 value 4709.033788
## iter  40 value 4641.020100
## iter  50 value 4511.299269
## iter  60 value 4186.138737
## iter  70 value 3566.320316
## iter  80 value 3414.131038
## iter  90 value 3247.262659
## iter 100 value 3153.837504
## final  value 3153.837504 
## stopped after 100 iterations
## # weights:  551
## initial  value 5333.432842 
## iter  10 value 4982.494817
## iter  20 value 4826.021216
## iter  30 value 4784.417052
## iter  40 value 4754.122634
## iter  50 value 4712.852057
## iter  60 value 4620.262928
## iter  70 value 4590.756844
## iter  80 value 4216.767024
## iter  90 value 3670.653749
## iter 100 value 3332.828725
## final  value 3332.828725 
## stopped after 100 iterations
## # weights:  771
## initial  value 5238.445550 
## iter  10 value 4852.480752
## iter  20 value 4762.834198
## iter  30 value 4735.047808
## iter  40 value 4712.507362
## iter  50 value 4451.755393
## iter  60 value 4059.211299
## iter  70 value 3743.523197
## iter  80 value 3450.717152
## iter  90 value 3415.328569
## iter 100 value 3390.430494
## final  value 3390.430494 
## stopped after 100 iterations
## # weights:  221
## initial  value 5689.631784 
## iter  10 value 4941.695197
## iter  20 value 4788.243538
## iter  30 value 4783.501385
## iter  40 value 4761.743598
## iter  50 value 4669.543709
## iter  60 value 4441.320855
## iter  70 value 4212.746885
## iter  80 value 3960.157138
## iter  90 value 3822.855600
## iter 100 value 3539.608561
## final  value 3539.608561 
## stopped after 100 iterations
## # weights:  331
## initial  value 5657.018857 
## iter  10 value 5162.438775
## iter  20 value 5160.812570
## iter  30 value 5160.767320
## iter  40 value 5137.002767
## iter  50 value 5117.281323
## iter  60 value 4797.232724
## iter  70 value 4773.863013
## iter  80 value 4483.113148
## iter  90 value 3551.311945
## iter 100 value 3296.557869
## final  value 3296.557869 
## stopped after 100 iterations
## # weights:  551
## initial  value 5350.486777 
## iter  10 value 5076.776642
## iter  20 value 4840.826813
## iter  30 value 4791.753361
## iter  40 value 4781.103999
## iter  50 value 4759.041674
## iter  60 value 4723.434649
## iter  70 value 4712.893127
## iter  80 value 4709.675263
## iter  90 value 4654.553912
## iter 100 value 4635.112804
## final  value 4635.112804 
## stopped after 100 iterations
## # weights:  771
## initial  value 7650.707541 
## iter  10 value 5183.093941
## iter  20 value 4998.571804
## iter  30 value 4811.252854
## iter  40 value 4796.952232
## iter  50 value 4786.777092
## iter  60 value 4758.074763
## iter  70 value 4730.594379
## iter  80 value 4698.267288
## iter  90 value 4363.759523
## iter 100 value 3862.807015
## final  value 3862.807015 
## stopped after 100 iterations
## # weights:  221
## initial  value 6037.956388 
## iter  10 value 5107.718980
## iter  20 value 4733.300557
## iter  30 value 4711.567219
## iter  40 value 4707.946793
## iter  50 value 4697.979141
## iter  60 value 4675.726429
## iter  70 value 4630.521228
## iter  80 value 4536.668557
## iter  90 value 3978.526732
## iter 100 value 3740.321946
## final  value 3740.321946 
## stopped after 100 iterations
## # weights:  331
## initial  value 8216.978974 
## iter  10 value 5106.709779
## iter  20 value 4792.735224
## iter  30 value 4747.378126
## iter  40 value 4292.090004
## iter  50 value 4020.017917
## iter  60 value 3925.577641
## iter  70 value 3872.731754
## iter  80 value 3772.225544
## iter  90 value 3401.112503
## iter 100 value 3225.718291
## final  value 3225.718291 
## stopped after 100 iterations
## # weights:  551
## initial  value 5330.839724 
## iter  10 value 5158.508668
## iter  20 value 4807.851912
## iter  30 value 4707.242807
## iter  40 value 4680.219031
## iter  50 value 4660.352779
## iter  60 value 4656.808708
## iter  70 value 4635.733650
## iter  80 value 4618.541823
## iter  90 value 4610.894474
## iter 100 value 4465.335788
## final  value 4465.335788 
## stopped after 100 iterations
## # weights:  771
## initial  value 7217.185246 
## iter  10 value 4862.247406
## iter  20 value 4795.674221
## iter  30 value 4731.386274
## iter  40 value 4552.366713
## iter  50 value 4443.204867
## iter  60 value 4103.096979
## iter  70 value 3559.348937
## iter  80 value 3375.076505
## iter  90 value 3276.777831
## iter 100 value 3173.772519
## final  value 3173.772519 
## stopped after 100 iterations
## # weights:  221
## initial  value 5762.169066 
## iter  10 value 5158.125798
## iter  20 value 4826.106826
## iter  30 value 4778.151924
## iter  40 value 4654.168871
## iter  50 value 3927.748750
## iter  60 value 3312.012336
## iter  70 value 3145.847559
## iter  80 value 3073.331961
## iter  90 value 3029.938307
## iter 100 value 3021.247846
## final  value 3021.247846 
## stopped after 100 iterations
## # weights:  331
## initial  value 5220.143561 
## iter  10 value 5160.723662
## iter  20 value 5159.722954
## iter  30 value 4952.593014
## iter  40 value 4744.635247
## iter  50 value 4719.890708
## iter  60 value 4704.506425
## iter  70 value 4701.153289
## iter  80 value 4580.711443
## iter  90 value 4330.909738
## iter 100 value 4017.274628
## final  value 4017.274628 
## stopped after 100 iterations
## # weights:  551
## initial  value 5213.083113 
## iter  10 value 4950.741088
## iter  20 value 4936.925383
## iter  30 value 4788.798323
## iter  40 value 4783.187105
## iter  50 value 4781.699014
## iter  60 value 4778.775773
## iter  70 value 4762.701892
## iter  80 value 4750.201202
## iter  90 value 4726.949517
## iter 100 value 4690.064954
## final  value 4690.064954 
## stopped after 100 iterations
## # weights:  771
## initial  value 7343.229216 
## iter  10 value 5123.928280
## iter  20 value 4827.110087
## iter  30 value 4760.516747
## iter  40 value 4668.119112
## iter  50 value 4621.684796
## iter  60 value 4590.786033
## iter  70 value 4519.177035
## iter  80 value 4505.074832
## iter  90 value 4438.961762
## iter 100 value 4218.812791
## final  value 4218.812791 
## stopped after 100 iterations
## # weights:  221
## initial  value 5650.898726 
## iter  10 value 5125.818662
## iter  20 value 4744.312223
## iter  30 value 4688.430884
## iter  40 value 4592.034474
## iter  50 value 4168.553132
## iter  60 value 3539.111899
## iter  70 value 3260.510140
## iter  80 value 3189.399400
## iter  90 value 3147.536207
## iter 100 value 3111.359474
## final  value 3111.359474 
## stopped after 100 iterations
## # weights:  331
## initial  value 5390.509193 
## iter  10 value 5133.195136
## iter  20 value 4802.421460
## iter  30 value 4784.097060
## iter  40 value 4782.973256
## iter  50 value 4771.048145
## iter  60 value 4678.901752
## iter  70 value 4675.218484
## iter  80 value 4668.407536
## iter  90 value 4662.533935
## iter 100 value 4660.091917
## final  value 4660.091917 
## stopped after 100 iterations
## # weights:  551
## initial  value 7580.374538 
## iter  10 value 5106.253122
## iter  20 value 4888.228691
## iter  30 value 4810.550139
## iter  40 value 4801.894520
## iter  50 value 4711.654011
## iter  60 value 4688.927610
## iter  70 value 4685.655642
## iter  80 value 4667.843694
## iter  90 value 4622.696503
## iter 100 value 4534.904029
## final  value 4534.904029 
## stopped after 100 iterations
## # weights:  771
## initial  value 6890.697079 
## iter  10 value 5149.981570
## iter  20 value 4862.015161
## iter  30 value 4820.558610
## iter  40 value 4802.707040
## iter  50 value 4704.379172
## iter  60 value 4679.051090
## iter  70 value 4678.896736
## iter  80 value 4671.380618
## iter  90 value 4668.331269
## iter 100 value 4666.111040
## final  value 4666.111040 
## stopped after 100 iterations
## # weights:  331
## initial  value 8295.375385 
## iter  10 value 7456.265098
## iter  20 value 7383.345089
## iter  30 value 7069.446093
## iter  40 value 7037.460188
## iter  50 value 7024.961832
## iter  60 value 7022.798127
## iter  70 value 7022.331042
## iter  80 value 7014.293025
## iter  90 value 6729.202377
## iter 100 value 5779.900414
## final  value 5779.900414 
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n2_NN1Fit0
## Neural Network 
## 
## 13266 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8844, 8844, 8844 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7721996  0.3282817
##   2     0.5    0.8033318  0.4517339
##   2     0.7    0.8002412  0.4691387
##   3     0.3    0.8249661  0.5626807
##   3     0.5    0.7913463  0.4642706
##   3     0.7    0.8028795  0.4715081
##   5     0.3    0.7689582  0.2538023
##   5     0.5    0.8074778  0.4525695
##   5     0.7    0.7708428  0.2524926
##   7     0.3    0.7979044  0.4159694
##   7     0.5    0.7947384  0.3558387
##   7     0.7    0.7801900  0.3189116
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8256445 0.5347792    Fold3
## 2 0.8276798 0.5828865    Fold2
## 3 0.8215739 0.5703763    Fold1
ad_tda_kde_5.60.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n2_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.01    -0.83     0.05     0.11     0.03     0.00    -0.19    -0.02 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.07    -0.05     0.00     0.00     0.00    -0.06    -0.11     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00    -0.81     0.17     0.60     0.00    -0.30 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##    -0.26     0.00     0.00     0.78     0.09    -0.49     0.00    -0.60 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     1.17    -0.22     0.15     0.05     0.33     0.00     0.04 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.24    -0.17    -0.14    -0.21     0.14    -0.06    -0.72     0.04 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.46     0.11    -0.12     0.36     0.48    -0.02     0.26    -0.03 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##    -1.05    -0.06     0.24     0.00     0.00    -0.17    -0.98     0.99 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.13     0.40    -1.38     0.12     0.19     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.15     0.00     0.00    -0.07     0.00     0.00     0.06     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.13    -0.10     0.00     0.00     0.00     0.00     0.00     0.12 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.12     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00    -0.72     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     2.23     0.17     0.15    -1.08    -0.15     0.00     0.02     1.32 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.44     1.43     0.09     0.00     0.00     0.67     2.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     1.27     1.28    -3.39     0.00     1.16 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -2.12     0.00     0.00     1.35     0.27    -6.28     0.23    -7.04 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     4.70     3.54     0.79     6.28     0.15     0.13    -0.03     0.75 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -2.45     2.25     1.67     1.66     1.69     0.19    -3.31    -0.03 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -1.15     0.37     0.32    -1.43    -2.62     5.76     4.27     3.83 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -7.59    -0.99    -0.59     2.80     0.36     0.65     4.14    -1.91 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##    -0.11    -0.68    -0.21    -0.14    -0.94    -0.52     0.09     0.81 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.23     0.24     0.00     0.10    -0.80    -0.63     1.09     0.67 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.24     0.51     0.01     0.01    -0.06     0.07    -0.76    -0.28 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##    -0.38     0.76     0.53     0.58     0.23     1.24     0.14    -0.20 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##    -0.14    -1.64     0.65    -0.10     0.60     0.38     0.27     0.45 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.03     0.29    -1.37     0.14     0.31 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.01     0.88     0.00     0.00     0.02     0.00    -0.04     0.03 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##    -0.01     0.01     0.00    -0.03     0.00    -0.07     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.09     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00    -0.01     0.65     0.01     0.00     0.06 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00    -0.05     0.00     0.00     0.00     0.02     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##    -0.01     0.00     0.04    -0.07     0.00     0.00     0.01     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.03     0.00     0.00     0.06     0.02     0.00     0.00    -0.06 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.06     0.00    -0.05     0.02     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.84     0.00     2.06     0.04     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00    -0.03     0.00     0.00 
##  b->o h1->o h2->o h3->o 
## -0.23  1.01 -3.73  1.05
vip(Adult_TDA_KDE_5.60.5_n2_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7143  1678
##      >50K     273   674
##                                           
##                Accuracy : 0.8003          
##                  95% CI : (0.7922, 0.8082)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3137          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9632          
##             Specificity : 0.2866          
##          Pos Pred Value : 0.8098          
##          Neg Pred Value : 0.7117          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7313          
##    Detection Prevalence : 0.9031          
##       Balanced Accuracy : 0.6249          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7143  1678
##      >50K     273   674
##                                           
##                Accuracy : 0.8003          
##                  95% CI : (0.7922, 0.8082)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3137          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9632          
##             Specificity : 0.2866          
##          Pos Pred Value : 0.8098          
##          Neg Pred Value : 0.7117          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7313          
##    Detection Prevalence : 0.9031          
##       Balanced Accuracy : 0.6249          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.002662e-01   3.137412e-01   7.921975e-01   8.081554e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.888881e-22  1.004573e-221
ad_tda_kde_5.60.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9631877            0.2865646            0.8097721 
##       Neg Pred Value            Precision               Recall 
##            0.7117212            0.8097721            0.9631877 
##                   F1           Prevalence       Detection Rate 
##            0.8798423            0.7592138            0.7312654 
## Detection Prevalence    Balanced Accuracy 
##            0.9030508            0.6248762
ad_tda_kde_5.60.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n2_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n2_3_fold
##      Accuracy
## 1 -0.01769400
## 2  0.02241235
## 3 -0.02017884
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n2_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n2_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n2_3_fold
## $winLeft
## [1] 0.4894667
## 
## $winRope
## [1] 0.3753667
## 
## $winRight
## [1] 0.1351667
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n2_3_fold
## $left
## [1] 0.3948839
## 
## $rope
## [1] 0.3840987
## 
## $right
## [1] 0.2210175
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold))
#bf_tda_kde_5.60.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold)
## t = -0.3734, df = 2, p-value = 0.7447
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.06453689  0.05422989
## sample estimates:
##    mean of x 
## -0.005153499
### Test set diff
diff_tda_kde_5.60.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n2_test
##   Accuracy 
## 0.00481163
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n2_test_odds.left<-bst_tda_kde_5.60.5_nn1.n2_test$probLeft/bst_tda_kde_5.60.5_nn1.n2_test$probRight
bst_tda_kde_5.60.5_nn1.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n2_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n2_test)) #bf_tda_pca_5.60.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test))

##Node3

#Neural Network 1
Adult_TDA_KDE_5.60.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 5173.766904 
## iter  10 value 5158.816215
## iter  20 value 4859.222698
## iter  30 value 4818.377281
## iter  40 value 4755.842411
## iter  50 value 4713.355156
## iter  60 value 4698.288805
## iter  70 value 4674.146151
## iter  80 value 4637.643065
## iter  90 value 4623.070153
## iter 100 value 4615.723013
## final  value 4615.723013 
## stopped after 100 iterations
## # weights:  331
## initial  value 6473.410458 
## iter  10 value 5081.157282
## iter  20 value 4846.996978
## iter  30 value 4835.899220
## iter  40 value 4820.906722
## iter  50 value 4735.442308
## iter  60 value 4440.604673
## iter  70 value 4019.504968
## iter  80 value 3699.328476
## iter  90 value 3340.701701
## iter 100 value 3132.626094
## final  value 3132.626094 
## stopped after 100 iterations
## # weights:  551
## initial  value 6017.875276 
## iter  10 value 5160.548788
## iter  20 value 5110.636949
## iter  30 value 4869.342808
## iter  40 value 4743.117242
## iter  50 value 4696.106404
## iter  60 value 4668.513683
## iter  70 value 4666.026188
## iter  80 value 4656.849161
## iter  90 value 4655.154700
## iter 100 value 4648.408940
## final  value 4648.408940 
## stopped after 100 iterations
## # weights:  771
## initial  value 5606.139581 
## iter  10 value 4850.698808
## iter  20 value 4819.807601
## iter  30 value 4815.602466
## iter  40 value 4814.749279
## iter  50 value 4814.372622
## iter  60 value 4812.555697
## iter  70 value 4788.451961
## iter  80 value 4724.402936
## iter  90 value 4716.811297
## iter 100 value 4709.406579
## final  value 4709.406579 
## stopped after 100 iterations
## # weights:  221
## initial  value 6525.535799 
## iter  10 value 5107.919528
## iter  20 value 4811.726587
## iter  30 value 4748.623192
## iter  40 value 4738.762459
## iter  50 value 4721.771950
## iter  60 value 4713.150235
## iter  70 value 4712.956961
## iter  80 value 4712.369356
## iter  90 value 4708.805408
## iter 100 value 4707.579416
## final  value 4707.579416 
## stopped after 100 iterations
## # weights:  331
## initial  value 5303.280547 
## iter  10 value 4864.669602
## iter  20 value 4831.076164
## iter  30 value 4827.866474
## iter  40 value 4813.222292
## iter  50 value 4742.786790
## iter  60 value 4727.052257
## iter  70 value 4715.456030
## iter  80 value 4664.752186
## iter  90 value 4650.043181
## iter 100 value 4647.433538
## final  value 4647.433538 
## stopped after 100 iterations
## # weights:  551
## initial  value 8174.301694 
## iter  10 value 5130.592507
## iter  20 value 4840.920013
## iter  30 value 4818.764225
## iter  40 value 4693.733923
## iter  50 value 4629.930675
## iter  60 value 4423.433974
## iter  70 value 3898.091451
## iter  80 value 3427.972892
## iter  90 value 3297.886764
## iter 100 value 3150.120483
## final  value 3150.120483 
## stopped after 100 iterations
## # weights:  771
## initial  value 8390.437739 
## iter  10 value 5109.960956
## iter  20 value 4813.301832
## iter  30 value 4665.327913
## iter  40 value 4650.584771
## iter  50 value 4571.520044
## iter  60 value 4205.611360
## iter  70 value 3624.966054
## iter  80 value 3462.497793
## iter  90 value 3442.302391
## iter 100 value 3259.590462
## final  value 3259.590462 
## stopped after 100 iterations
## # weights:  221
## initial  value 5346.783662 
## iter  10 value 5129.428945
## iter  20 value 4922.853497
## iter  30 value 4808.906661
## iter  40 value 4706.918042
## iter  50 value 4631.363376
## iter  60 value 4490.585549
## iter  70 value 4317.787384
## iter  80 value 4098.698692
## iter  90 value 3675.588540
## iter 100 value 3461.825948
## final  value 3461.825948 
## stopped after 100 iterations
## # weights:  331
## initial  value 10194.797571 
## iter  10 value 5119.705490
## iter  20 value 5115.485470
## iter  30 value 4835.996230
## iter  40 value 4817.261612
## iter  50 value 4816.276311
## iter  60 value 4810.130933
## iter  70 value 4755.048082
## iter  80 value 4732.863782
## iter  90 value 4259.012968
## iter 100 value 3895.216061
## final  value 3895.216061 
## stopped after 100 iterations
## # weights:  551
## initial  value 5343.417062 
## iter  10 value 5160.728366
## iter  20 value 5160.603501
## iter  30 value 4842.008824
## iter  40 value 4829.558196
## iter  50 value 4794.498955
## iter  60 value 4759.063462
## iter  70 value 4620.066940
## iter  80 value 4481.737409
## iter  90 value 4377.218047
## iter 100 value 3614.297230
## final  value 3614.297230 
## stopped after 100 iterations
## # weights:  771
## initial  value 8211.602652 
## iter  10 value 5066.000314
## iter  20 value 4864.590160
## iter  30 value 4776.572503
## iter  40 value 4603.789840
## iter  50 value 3866.000062
## iter  60 value 3781.735694
## iter  70 value 3740.361191
## iter  80 value 3486.621202
## iter  90 value 3362.647843
## iter 100 value 3331.346553
## final  value 3331.346553 
## stopped after 100 iterations
## # weights:  221
## initial  value 6069.508516 
## iter  10 value 5103.854467
## iter  20 value 4800.133279
## iter  30 value 4795.197616
## iter  40 value 4773.635268
## iter  50 value 4716.299034
## iter  60 value 4647.857081
## iter  70 value 4392.526438
## iter  80 value 4055.565440
## iter  90 value 3430.894220
## iter 100 value 3332.590956
## final  value 3332.590956 
## stopped after 100 iterations
## # weights:  331
## initial  value 5389.296505 
## iter  10 value 5160.129962
## iter  20 value 4840.753408
## iter  30 value 4800.483957
## iter  40 value 4700.853170
## iter  50 value 4680.255132
## iter  60 value 4671.433701
## iter  70 value 4653.474790
## iter  80 value 4639.389487
## iter  90 value 4622.226738
## iter 100 value 4595.027535
## final  value 4595.027535 
## stopped after 100 iterations
## # weights:  551
## initial  value 5217.939419 
## iter  10 value 5160.557336
## iter  20 value 5110.426602
## iter  30 value 4825.214856
## iter  40 value 4797.591742
## iter  50 value 4739.774930
## iter  60 value 4640.124585
## iter  70 value 4541.871376
## iter  80 value 4170.998239
## iter  90 value 3591.545812
## iter 100 value 3237.889131
## final  value 3237.889131 
## stopped after 100 iterations
## # weights:  771
## initial  value 5305.678551 
## iter  10 value 5133.265681
## iter  20 value 5081.869232
## iter  30 value 4938.044090
## iter  40 value 4740.202423
## iter  50 value 3964.483394
## iter  60 value 3631.225591
## iter  70 value 3499.379503
## iter  80 value 3221.991772
## iter  90 value 3125.198982
## iter 100 value 3115.078415
## final  value 3115.078415 
## stopped after 100 iterations
## # weights:  221
## initial  value 5460.833002 
## iter  10 value 5106.629361
## iter  20 value 4779.260753
## iter  30 value 4774.494329
## iter  40 value 4762.975597
## iter  50 value 4751.408937
## iter  60 value 4737.934387
## iter  70 value 4720.876773
## iter  80 value 4706.735032
## iter  90 value 4647.366537
## iter 100 value 4572.360635
## final  value 4572.360635 
## stopped after 100 iterations
## # weights:  331
## initial  value 5862.998534 
## iter  10 value 4876.519444
## iter  20 value 4785.198638
## iter  30 value 4747.968480
## iter  40 value 4513.503940
## iter  50 value 4261.186308
## iter  60 value 3624.435952
## iter  70 value 3311.345206
## iter  80 value 3183.757727
## iter  90 value 3090.652376
## iter 100 value 3031.725465
## final  value 3031.725465 
## stopped after 100 iterations
## # weights:  551
## initial  value 5609.985654 
## iter  10 value 5081.825345
## iter  20 value 4804.065631
## iter  30 value 4734.267018
## iter  40 value 4720.585339
## iter  50 value 4699.238611
## iter  60 value 4574.624416
## iter  70 value 3970.248271
## iter  80 value 3424.890035
## iter  90 value 3253.875625
## iter 100 value 3140.496370
## final  value 3140.496370 
## stopped after 100 iterations
## # weights:  771
## initial  value 5313.639793 
## iter  10 value 5163.112279
## iter  20 value 5160.601242
## iter  30 value 5160.572249
## iter  40 value 5027.889007
## iter  50 value 4835.199193
## iter  60 value 4818.608820
## iter  70 value 4800.718813
## iter  80 value 4725.594982
## iter  90 value 4465.313560
## iter 100 value 4086.070467
## final  value 4086.070467 
## stopped after 100 iterations
## # weights:  221
## initial  value 6611.175021 
## iter  10 value 5162.183725
## iter  20 value 4823.917093
## iter  30 value 4823.236463
## iter  40 value 4799.723621
## iter  50 value 4791.555781
## iter  60 value 4783.909180
## iter  70 value 4770.033642
## iter  80 value 4747.289167
## iter  90 value 4728.203259
## iter 100 value 4718.829418
## final  value 4718.829418 
## stopped after 100 iterations
## # weights:  331
## initial  value 5316.454484 
## iter  10 value 4990.990209
## iter  20 value 4921.804940
## iter  30 value 4884.544599
## iter  40 value 4778.772267
## iter  50 value 4729.438475
## iter  60 value 4705.790516
## iter  70 value 4691.339845
## iter  80 value 4624.034337
## iter  90 value 4606.822283
## iter 100 value 4589.307660
## final  value 4589.307660 
## stopped after 100 iterations
## # weights:  551
## initial  value 6719.038565 
## iter  10 value 5156.786373
## iter  20 value 5031.272551
## iter  30 value 4731.992808
## iter  40 value 4704.381066
## iter  50 value 4679.064253
## iter  60 value 4604.304619
## iter  70 value 4519.123838
## iter  80 value 4361.836924
## iter  90 value 4338.105353
## iter 100 value 4038.390421
## final  value 4038.390421 
## stopped after 100 iterations
## # weights:  771
## initial  value 8126.441918 
## iter  10 value 5006.578895
## iter  20 value 4816.440157
## iter  30 value 4756.577511
## iter  40 value 4716.501376
## iter  50 value 4702.462131
## iter  60 value 4698.065875
## iter  70 value 4693.123402
## iter  80 value 4692.076827
## iter  90 value 4670.177575
## iter 100 value 4635.668197
## final  value 4635.668197 
## stopped after 100 iterations
## # weights:  221
## initial  value 9610.521905 
## iter  10 value 5130.617025
## iter  20 value 4768.834286
## iter  30 value 4768.658673
## iter  30 value 4768.658642
## iter  40 value 4708.963354
## iter  50 value 4678.762612
## iter  60 value 4672.183220
## iter  70 value 4668.924015
## iter  80 value 4668.805343
## iter  90 value 4656.489511
## iter 100 value 4643.771305
## final  value 4643.771305 
## stopped after 100 iterations
## # weights:  331
## initial  value 7004.895894 
## iter  10 value 4832.664372
## iter  20 value 4752.800882
## iter  30 value 4687.890904
## iter  40 value 4656.793432
## iter  50 value 4616.967166
## iter  60 value 4494.977650
## iter  70 value 3827.926500
## iter  80 value 3655.235798
## iter  90 value 3637.388920
## iter 100 value 3632.590039
## final  value 3632.590039 
## stopped after 100 iterations
## # weights:  551
## initial  value 6996.454423 
## iter  10 value 5073.795339
## iter  20 value 4763.964230
## iter  30 value 4631.110190
## iter  40 value 4615.561688
## iter  50 value 4588.670505
## iter  60 value 4570.612553
## iter  70 value 4496.418226
## iter  80 value 4326.353217
## iter  90 value 4144.571180
## iter 100 value 3951.657097
## final  value 3951.657097 
## stopped after 100 iterations
## # weights:  771
## initial  value 6088.615002 
## iter  10 value 4987.773619
## iter  20 value 4787.218937
## iter  30 value 4140.693195
## iter  40 value 3855.462005
## iter  50 value 3518.562774
## iter  60 value 3208.522152
## iter  70 value 3074.905171
## iter  80 value 2993.458940
## iter  90 value 2947.431500
## iter 100 value 2925.228368
## final  value 2925.228368 
## stopped after 100 iterations
## # weights:  221
## initial  value 5405.147434 
## iter  10 value 5131.962214
## iter  20 value 4876.019258
## iter  30 value 4741.806599
## iter  40 value 4677.683499
## iter  50 value 4669.867773
## iter  60 value 4665.654576
## iter  70 value 4655.163411
## iter  80 value 4585.818421
## iter  90 value 4567.285966
## iter 100 value 4557.267021
## final  value 4557.267021 
## stopped after 100 iterations
## # weights:  331
## initial  value 8491.471015 
## iter  10 value 4962.411652
## iter  20 value 4770.790608
## iter  30 value 4764.188464
## iter  40 value 4760.904696
## iter  50 value 4733.697886
## iter  60 value 4685.381204
## iter  70 value 4499.232181
## iter  80 value 4441.459263
## iter  90 value 4437.750314
## iter 100 value 4432.863298
## final  value 4432.863298 
## stopped after 100 iterations
## # weights:  551
## initial  value 10081.256486 
## iter  10 value 4833.145093
## iter  20 value 4794.508197
## iter  30 value 4781.055085
## iter  40 value 4771.838805
## iter  50 value 4731.610235
## iter  60 value 4691.035675
## iter  70 value 4659.613111
## iter  80 value 4652.504580
## iter  90 value 4645.409961
## iter 100 value 4623.792382
## final  value 4623.792382 
## stopped after 100 iterations
## # weights:  771
## initial  value 5416.411293 
## iter  10 value 4934.159339
## iter  20 value 4858.604745
## iter  30 value 4753.756822
## iter  40 value 4730.596020
## iter  50 value 4722.447397
## iter  60 value 4714.466157
## iter  70 value 4679.937929
## iter  80 value 4652.322341
## iter  90 value 4646.412438
## iter 100 value 4572.067554
## final  value 4572.067554 
## stopped after 100 iterations
## # weights:  221
## initial  value 9074.084029 
## iter  10 value 5105.426107
## iter  20 value 5104.671561
## iter  30 value 5104.609901
## final  value 5104.609630 
## converged
## # weights:  331
## initial  value 5920.996942 
## iter  10 value 5165.770914
## iter  20 value 5160.863691
## iter  30 value 5160.807081
## iter  40 value 4875.015002
## iter  50 value 4861.057157
## iter  60 value 4837.850373
## iter  70 value 4731.210855
## iter  80 value 4677.285698
## iter  90 value 4476.576051
## iter 100 value 4281.581250
## final  value 4281.581250 
## stopped after 100 iterations
## # weights:  551
## initial  value 5297.434190 
## iter  10 value 5135.952721
## iter  20 value 4756.848384
## iter  30 value 4725.756364
## iter  40 value 4685.750972
## iter  50 value 4683.707007
## iter  60 value 4649.891185
## iter  70 value 4635.739719
## iter  80 value 4502.762235
## iter  90 value 4254.356674
## iter 100 value 3698.730849
## final  value 3698.730849 
## stopped after 100 iterations
## # weights:  771
## initial  value 5377.433299 
## iter  10 value 4790.247837
## iter  20 value 4680.294858
## iter  30 value 4653.277620
## iter  40 value 4621.873948
## iter  50 value 4555.805352
## iter  60 value 3988.863883
## iter  70 value 3626.102759
## iter  80 value 3258.933049
## iter  90 value 3109.087623
## iter 100 value 3022.364999
## final  value 3022.364999 
## stopped after 100 iterations
## # weights:  771
## initial  value 11384.274297 
## iter  10 value 7675.831343
## iter  20 value 7249.699722
## iter  30 value 7212.071632
## iter  40 value 7026.256588
## iter  50 value 6998.345823
## iter  60 value 6963.350936
## iter  70 value 6955.592972
## iter  80 value 6951.480647
## iter  90 value 6942.576323
## iter 100 value 6936.759958
## final  value 6936.759958 
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n3_NN1Fit0
## Neural Network 
## 
## 13266 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8844, 8844, 8844 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7881049  0.3750234
##   2     0.5    0.7777024  0.2872715
##   2     0.7    0.7822252  0.2715509
##   3     0.3    0.8089100  0.4550476
##   3     0.5    0.7921755  0.3931054
##   3     0.7    0.7727273  0.2334769
##   5     0.3    0.7850143  0.3307373
##   5     0.5    0.8086839  0.4462745
##   5     0.7    0.7942861  0.3960732
##   7     0.3    0.8126036  0.4821764
##   7     0.5    0.7861450  0.3436784
##   7     0.7    0.8107191  0.4633917
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8290366 0.5712893    Fold2
## 2 0.7761194 0.3017683    Fold1
## 3 0.8326549 0.5734715    Fold3
ad_tda_kde_5.60.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n3_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00    -0.04     0.00     0.00     0.00     0.00    -0.01     0.01 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.01     0.00     0.00     0.00     0.02     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.01     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.18     0.00     0.05     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -0.29    -0.10     0.00     0.09     0.16     0.00    -0.62     0.09 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.12    -0.11    -0.02     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.02    -0.18     0.43     0.00    -0.32 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -0.15     0.00     0.00    -0.08    -2.02    -0.65     0.00     0.64 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##    -0.01    -0.39     0.12     0.00     0.00    -0.11     0.00    -0.55 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.37     0.00    -0.15     0.04     0.02     0.00    -0.36     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.43     0.01     0.01     0.82    -0.80     0.00     0.00    -0.13 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.17     0.00     0.00    -0.14     0.00    -0.15    -0.34     0.05 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.08    -0.17     0.12     0.00     0.14     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00    -0.12     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00    -0.42     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.10     0.00     0.00     0.00     0.00     0.01    -0.01 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.03     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00    -0.01     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00    -0.02     0.00     0.00     0.01 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00    -0.01     0.00     0.00     0.00    -0.01     0.00     0.01 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##    -0.01     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.01    -0.01     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00    -0.01     0.00     0.01     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.75     0.02    -0.13     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
## -0.10 -1.01  1.68  0.02 -0.07  1.07  0.00 -0.11
vip(Adult_TDA_KDE_5.60.5_n3_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7147  1675
##      >50K     269   677
##                                           
##                Accuracy : 0.801           
##                  95% CI : (0.7929, 0.8089)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3161          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9637          
##             Specificity : 0.2878          
##          Pos Pred Value : 0.8101          
##          Neg Pred Value : 0.7156          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7317          
##    Detection Prevalence : 0.9032          
##       Balanced Accuracy : 0.6258          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7147  1675
##      >50K     269   677
##                                           
##                Accuracy : 0.801           
##                  95% CI : (0.7929, 0.8089)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3161          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9637          
##             Specificity : 0.2878          
##          Pos Pred Value : 0.8101          
##          Neg Pred Value : 0.7156          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7317          
##    Detection Prevalence : 0.9032          
##       Balanced Accuracy : 0.6258          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.009828e-01   3.160787e-01   7.929246e-01   8.088611e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   3.424777e-23  7.890674e-223
ad_tda_kde_5.60.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9637271            0.2878401            0.8101338 
##       Neg Pred Value            Precision               Recall 
##            0.7156448            0.8101338            0.9637271 
##                   F1           Prevalence       Detection Rate 
##            0.8802808            0.7592138            0.7316749 
## Detection Prevalence    Balanced Accuracy 
##            0.9031532            0.6257836
ad_tda_kde_5.60.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n3_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n3_3_fold
##      Accuracy
## 1 -0.02108613
## 2  0.07397273
## 3 -0.03125980
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n3_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n3_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n3_3_fold
## $winLeft
## [1] 0.5385667
## 
## $winRope
## [1] 0.01733333
## 
## $winRight
## [1] 0.4441
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n3_3_fold
## $left
## [1] 0.3500051
## 
## $rope
## [1] 0.1754636
## 
## $right
## [1] 0.4745313
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold))
#bf_tda_kde_5.60.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold)
## t = 0.21512, df = 2, p-value = 0.8496
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1369766  0.1513944
## sample estimates:
##   mean of x 
## 0.007208932
### Test set diff
diff_tda_kde_5.60.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n3_test
##    Accuracy 
## 0.004095004
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n3_test_odds.left<-bst_tda_kde_5.60.5_nn1.n3_test$probLeft/bst_tda_kde_5.60.5_nn1.n3_test$probRight
bst_tda_kde_5.60.5_nn1.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n3_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n3_test)) #bf_tda_pca_5.60.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test))

##Node4

#Neural Network 1
Adult_TDA_KDE_5.60.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n4.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4743.562112 
## iter  10 value 4082.003154
## iter  20 value 3844.886381
## iter  30 value 3844.320194
## iter  40 value 3844.037795
## iter  50 value 3783.250142
## iter  60 value 3779.777945
## iter  70 value 3764.399000
## iter  80 value 3732.849412
## iter  90 value 3726.031070
## iter 100 value 3705.929119
## final  value 3705.929119 
## stopped after 100 iterations
## # weights:  331
## initial  value 4881.123440 
## iter  10 value 3813.282267
## iter  20 value 3774.904418
## iter  30 value 3773.126152
## iter  40 value 3736.299792
## iter  50 value 3718.934374
## iter  60 value 3711.839847
## iter  70 value 3456.321896
## iter  80 value 3259.873230
## iter  90 value 3031.649835
## iter 100 value 2745.486028
## final  value 2745.486028 
## stopped after 100 iterations
## # weights:  551
## initial  value 5891.026768 
## iter  10 value 4059.191814
## iter  20 value 3789.088050
## iter  30 value 3669.745964
## iter  40 value 3270.470855
## iter  50 value 2870.061515
## iter  60 value 2768.173411
## iter  70 value 2492.653057
## iter  80 value 2326.572339
## iter  90 value 2310.893797
## iter 100 value 2299.913232
## final  value 2299.913232 
## stopped after 100 iterations
## # weights:  771
## initial  value 4580.340904 
## iter  10 value 4081.992309
## final  value 4081.990352 
## converged
## # weights:  221
## initial  value 6274.289002 
## iter  10 value 4082.543061
## iter  20 value 4082.211973
## final  value 4082.171039 
## converged
## # weights:  331
## initial  value 10062.451062 
## iter  10 value 3823.301649
## iter  20 value 3722.726526
## iter  30 value 3709.137477
## iter  40 value 3647.608720
## iter  50 value 3460.656553
## iter  60 value 2953.431424
## iter  70 value 2837.823765
## iter  80 value 2623.790240
## iter  90 value 2507.191775
## iter 100 value 2413.588717
## final  value 2413.588717 
## stopped after 100 iterations
## # weights:  551
## initial  value 7277.957005 
## iter  10 value 3891.641007
## iter  20 value 3786.477019
## iter  30 value 3763.987313
## final  value 3763.109701 
## converged
## # weights:  771
## initial  value 9755.597945 
## iter  10 value 4023.761348
## iter  20 value 3794.467576
## iter  30 value 3692.551294
## iter  40 value 3557.161583
## iter  50 value 3026.430714
## iter  60 value 2912.210085
## iter  70 value 2867.342680
## iter  80 value 2745.729544
## iter  90 value 2695.306956
## iter 100 value 2690.323351
## final  value 2690.323351 
## stopped after 100 iterations
## # weights:  221
## initial  value 4612.842813 
## iter  10 value 4083.028369
## iter  20 value 4082.487665
## iter  30 value 4082.481431
## iter  30 value 4082.481405
## iter  30 value 4082.481404
## final  value 4082.481404 
## converged
## # weights:  331
## initial  value 7186.646190 
## iter  10 value 4082.768691
## iter  20 value 4082.586593
## iter  30 value 4069.307946
## iter  40 value 4004.307698
## iter  50 value 3866.971017
## iter  60 value 3788.981353
## iter  70 value 3785.178299
## iter  80 value 3779.044680
## iter  90 value 3771.611214
## iter 100 value 3730.803600
## final  value 3730.803600 
## stopped after 100 iterations
## # weights:  551
## initial  value 6627.454950 
## iter  10 value 4077.659143
## iter  20 value 3760.517297
## iter  30 value 3731.633755
## iter  40 value 3729.421207
## iter  50 value 3722.272148
## iter  60 value 3691.391110
## iter  70 value 3660.484868
## iter  80 value 3587.679656
## iter  90 value 3358.442267
## iter 100 value 2541.320297
## final  value 2541.320297 
## stopped after 100 iterations
## # weights:  771
## initial  value 5052.436394 
## iter  10 value 3869.525176
## iter  20 value 3855.668289
## iter  30 value 3751.854962
## iter  40 value 3736.770012
## iter  50 value 3731.312462
## iter  60 value 3728.155674
## iter  70 value 3699.681084
## iter  80 value 3688.224898
## iter  90 value 3637.673426
## iter 100 value 3579.925828
## final  value 3579.925828 
## stopped after 100 iterations
## # weights:  221
## initial  value 5740.283044 
## iter  10 value 4079.855858
## iter  20 value 3905.420754
## iter  30 value 3734.218084
## iter  40 value 3245.809205
## iter  50 value 2978.356853
## iter  60 value 2742.158672
## iter  70 value 2719.922371
## iter  80 value 2587.955475
## iter  90 value 2436.835439
## iter 100 value 2389.334182
## final  value 2389.334182 
## stopped after 100 iterations
## # weights:  331
## initial  value 4453.427373 
## iter  10 value 4082.017777
## iter  20 value 4081.903352
## final  value 4081.902076 
## converged
## # weights:  551
## initial  value 9364.244009 
## iter  10 value 4090.885490
## iter  20 value 4056.917891
## iter  30 value 3831.360228
## iter  40 value 3784.967191
## iter  50 value 3780.303587
## iter  60 value 3742.201003
## iter  70 value 3697.443780
## iter  80 value 3623.819339
## iter  90 value 3465.637717
## iter 100 value 2909.213649
## final  value 2909.213649 
## stopped after 100 iterations
## # weights:  771
## initial  value 10054.734148 
## iter  10 value 3982.413515
## iter  20 value 3895.971694
## iter  30 value 3775.974744
## iter  40 value 3749.857067
## iter  50 value 3740.956055
## iter  60 value 3734.485929
## iter  70 value 3731.233527
## iter  80 value 3729.940651
## iter  90 value 3727.750446
## iter 100 value 3727.087426
## final  value 3727.087426 
## stopped after 100 iterations
## # weights:  221
## initial  value 6223.342447 
## iter  10 value 3955.217098
## iter  20 value 3818.442258
## iter  30 value 3813.421305
## iter  40 value 3771.707462
## iter  50 value 3736.029807
## iter  60 value 3632.809247
## iter  70 value 2913.942291
## iter  80 value 2647.253635
## iter  90 value 2588.944170
## iter 100 value 2549.036549
## final  value 2549.036549 
## stopped after 100 iterations
## # weights:  331
## initial  value 9085.188073 
## iter  10 value 4082.543284
## iter  20 value 4082.113294
## iter  30 value 3842.205736
## iter  40 value 3815.874625
## iter  50 value 3808.974865
## iter  60 value 3761.939340
## iter  70 value 3743.463180
## iter  80 value 3734.212489
## iter  90 value 3728.382805
## iter 100 value 3723.717871
## final  value 3723.717871 
## stopped after 100 iterations
## # weights:  551
## initial  value 6178.194311 
## iter  10 value 4040.383968
## iter  20 value 3934.259973
## iter  30 value 3922.990568
## iter  40 value 3920.209484
## iter  50 value 3816.068530
## iter  60 value 3774.172993
## iter  70 value 3769.672907
## iter  80 value 3708.567315
## iter  90 value 3566.802374
## iter 100 value 3323.126554
## final  value 3323.126554 
## stopped after 100 iterations
## # weights:  771
## initial  value 9089.230103 
## iter  10 value 4060.475745
## iter  20 value 3803.913792
## iter  30 value 3740.215273
## iter  40 value 3698.896549
## iter  50 value 3599.484515
## iter  60 value 3485.602587
## iter  70 value 3174.190943
## iter  80 value 2765.455482
## iter  90 value 2600.908599
## iter 100 value 2463.561005
## final  value 2463.561005 
## stopped after 100 iterations
## # weights:  221
## initial  value 4496.364181 
## iter  10 value 4082.831924
## iter  20 value 3905.641377
## iter  30 value 3808.390514
## iter  40 value 3764.442209
## iter  50 value 3703.971843
## iter  60 value 3508.043469
## iter  70 value 3285.345028
## iter  80 value 2895.756325
## iter  90 value 2759.482187
## iter 100 value 2677.279603
## final  value 2677.279603 
## stopped after 100 iterations
## # weights:  331
## initial  value 6718.664622 
## iter  10 value 3996.695267
## iter  20 value 3882.365605
## iter  30 value 3809.854339
## iter  40 value 3738.372982
## iter  50 value 3643.473511
## iter  60 value 3121.196685
## iter  70 value 2664.486378
## iter  80 value 2624.555307
## iter  90 value 2571.585237
## iter 100 value 2502.716716
## final  value 2502.716716 
## stopped after 100 iterations
## # weights:  551
## initial  value 7380.678265 
## iter  10 value 3855.505147
## iter  20 value 3773.692743
## iter  30 value 3689.249757
## iter  40 value 3544.052897
## iter  50 value 3418.691917
## iter  60 value 3295.138228
## iter  70 value 3253.934617
## iter  80 value 3055.358845
## iter  90 value 2874.229190
## iter 100 value 2817.840818
## final  value 2817.840818 
## stopped after 100 iterations
## # weights:  771
## initial  value 4590.139347 
## iter  10 value 4034.644500
## iter  20 value 3840.001780
## iter  30 value 3804.295174
## iter  40 value 3720.367578
## iter  50 value 3686.643967
## iter  60 value 3626.885537
## iter  70 value 3366.394589
## iter  80 value 3083.565173
## iter  90 value 2639.047180
## iter 100 value 2476.827558
## final  value 2476.827558 
## stopped after 100 iterations
## # weights:  221
## initial  value 4207.730003 
## iter  10 value 4082.012952
## iter  20 value 4081.921824
## final  value 4081.920666 
## converged
## # weights:  331
## initial  value 4475.383261 
## iter  10 value 3908.900743
## iter  20 value 3828.086162
## iter  30 value 3785.148280
## iter  40 value 3762.043731
## iter  50 value 3751.673952
## iter  60 value 3749.836138
## iter  70 value 3743.668912
## iter  80 value 3743.122154
## iter  90 value 3740.635855
## iter 100 value 3736.826256
## final  value 3736.826256 
## stopped after 100 iterations
## # weights:  551
## initial  value 5689.053098 
## iter  10 value 3804.821458
## iter  20 value 3728.690682
## iter  30 value 3690.651929
## iter  40 value 3635.726177
## iter  50 value 3462.349271
## iter  60 value 3286.900173
## iter  70 value 3105.164142
## iter  80 value 2676.477266
## iter  90 value 2442.431456
## iter 100 value 2338.344287
## final  value 2338.344287 
## stopped after 100 iterations
## # weights:  771
## initial  value 7807.700386 
## final  value 4081.750420 
## converged
## # weights:  221
## initial  value 8131.142158 
## iter  10 value 3791.370932
## iter  20 value 3763.441110
## iter  30 value 3762.353982
## iter  40 value 3762.309245
## final  value 3762.309089 
## converged
## # weights:  331
## initial  value 8192.078947 
## iter  10 value 3972.203193
## iter  20 value 3768.172303
## iter  30 value 3737.419816
## iter  40 value 3288.862836
## iter  50 value 2826.048806
## iter  60 value 2632.029180
## iter  70 value 2597.225872
## iter  80 value 2490.832581
## iter  90 value 2452.986066
## iter 100 value 2414.083114
## final  value 2414.083114 
## stopped after 100 iterations
## # weights:  551
## initial  value 4125.200094 
## iter  10 value 4063.861516
## iter  20 value 3867.144815
## iter  30 value 3830.535774
## iter  40 value 3701.743527
## iter  50 value 3483.888867
## iter  60 value 3208.265277
## iter  70 value 2635.220850
## iter  80 value 2495.172387
## iter  90 value 2407.398453
## iter 100 value 2376.738774
## final  value 2376.738774 
## stopped after 100 iterations
## # weights:  771
## initial  value 5422.578376 
## iter  10 value 4069.522731
## iter  20 value 3889.263104
## iter  30 value 3430.805763
## iter  40 value 3186.929397
## iter  50 value 2843.902184
## iter  60 value 2795.277351
## iter  70 value 2690.908599
## iter  80 value 2653.880944
## iter  90 value 2632.614614
## iter 100 value 2576.365964
## final  value 2576.365964 
## stopped after 100 iterations
## # weights:  221
## initial  value 5926.801002 
## iter  10 value 4080.844926
## iter  20 value 4070.640191
## iter  30 value 3797.540431
## iter  40 value 3786.783734
## iter  50 value 3617.975538
## iter  60 value 3338.522930
## iter  70 value 3263.627306
## iter  80 value 3190.614989
## iter  90 value 3054.237061
## iter 100 value 2797.254140
## final  value 2797.254140 
## stopped after 100 iterations
## # weights:  331
## initial  value 7343.751275 
## iter  10 value 4034.308925
## iter  20 value 3767.539698
## iter  30 value 3672.521884
## iter  40 value 3362.832872
## iter  50 value 3019.600773
## iter  60 value 2651.204333
## iter  70 value 2501.969169
## iter  80 value 2464.661990
## iter  90 value 2405.899350
## iter 100 value 2387.633689
## final  value 2387.633689 
## stopped after 100 iterations
## # weights:  551
## initial  value 5748.819906 
## iter  10 value 3998.438471
## iter  20 value 3811.921179
## iter  30 value 3780.170725
## iter  40 value 3758.960497
## iter  50 value 3752.140999
## iter  60 value 3751.842706
## iter  70 value 3738.291923
## iter  80 value 3702.783840
## iter  90 value 3586.474064
## iter 100 value 3195.829757
## final  value 3195.829757 
## stopped after 100 iterations
## # weights:  771
## initial  value 4316.995865 
## iter  10 value 4081.889288
## final  value 4081.886060 
## converged
## # weights:  551
## initial  value 7881.622535 
## iter  10 value 5730.296369
## iter  20 value 5664.176911
## iter  30 value 5626.830601
## iter  40 value 5564.105803
## iter  50 value 5527.942097
## iter  60 value 5147.529175
## iter  70 value 4587.563972
## iter  80 value 4021.236043
## iter  90 value 3724.097789
## iter 100 value 3647.224599
## final  value 3647.224599 
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n4_NN1Fit0
## Neural Network 
## 
## 11795 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7864, 7863, 7863 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   2     0.3    0.8197537  0.25489444
##   2     0.5    0.8150887  0.24152677
##   2     0.7    0.8192426  0.34595694
##   3     0.3    0.8111086  0.23362743
##   3     0.5    0.8404417  0.44133030
##   3     0.7    0.8413710  0.42815038
##   5     0.3    0.8425615  0.44621056
##   5     0.5    0.8290788  0.40330614
##   5     0.7    0.8408655  0.44821064
##   7     0.3    0.7965231  0.07841179
##   7     0.5    0.8400148  0.44721166
##   7     0.7    0.8178032  0.27371827
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8560163 0.5501062    Fold1
## 2 0.8530010 0.5447380    Fold3
## 3 0.8186673 0.2437874    Fold2
ad_tda_kde_5.60.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n4_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.01     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.05     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##    -0.34    -1.30     0.31    -0.05    -0.07     0.00    -0.61     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.08    -0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00    -0.02     0.02    -0.01     0.00    -0.17 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00    -0.16    -3.29    -0.10     0.00     0.58 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00    -1.13     0.33    -0.01     0.31    -0.55     0.00    -0.12 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.32     0.07    -0.01     0.49    -0.88     0.18     0.36     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##    -0.66     0.20    -0.05     0.84    -0.73    -0.02     0.00    -0.24 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##    -0.19    -0.02     0.11    -0.18     0.00    -0.24    -0.93     0.59 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.03     0.05    -1.70    -0.11     0.00     0.33     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00    -0.01     0.00     0.26    -0.01 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.13     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.12    -0.34    -0.01     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.11     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00    -0.82     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##    -1.65     0.06    -0.27     0.71    -0.14    -0.20    -0.10     0.05 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##    -0.75    -0.46    -0.49     0.00     0.00     0.00    -2.47     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.13    -0.15     1.40     0.00    -1.24 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     1.43     0.00     0.00    -0.74    -0.32    -5.95     1.55     1.53 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.48    -0.09    -0.15     0.98    -0.47     0.11    -0.48    -0.06 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.63    -0.86    -1.14    -0.11    -0.47    -0.87     0.77     0.60 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.27     0.74    -0.32     0.54     0.17    -1.68    -0.85    -1.97 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     2.14    -0.88     0.56    -0.29    -1.05     0.02    -1.33    -0.32 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00     0.03     0.20     0.87     0.45    -0.15    -0.63 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     1.94    -0.26    -0.62    -0.07     0.19     1.08     0.71    -1.21 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##    -0.52    -0.35     0.00    -0.07    -0.50    -0.90    -0.27     0.39 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     1.87    -0.12     0.93     0.18    -0.81    -0.32    -0.40    -0.05 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##    -0.37     0.78    -0.71     0.44    -0.35    -0.54    -0.24    -1.06 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.16    -0.77     0.46    -0.63    -0.34 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.78     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.14     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o 
## -3.62  0.09 -0.21  4.49  5.01  0.77
vip(Adult_TDA_KDE_5.60.5_n4_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6295   826
##      >50K    1121  1526
##                                           
##                Accuracy : 0.8007          
##                  95% CI : (0.7926, 0.8086)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4772          
##                                           
##  Mcnemar's Test P-Value : 2.684e-11       
##                                           
##             Sensitivity : 0.8488          
##             Specificity : 0.6488          
##          Pos Pred Value : 0.8840          
##          Neg Pred Value : 0.5765          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6445          
##    Detection Prevalence : 0.7290          
##       Balanced Accuracy : 0.7488          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6295   826
##      >50K    1121  1526
##                                           
##                Accuracy : 0.8007          
##                  95% CI : (0.7926, 0.8086)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4772          
##                                           
##  Mcnemar's Test P-Value : 2.684e-11       
##                                           
##             Sensitivity : 0.8488          
##             Specificity : 0.6488          
##          Pos Pred Value : 0.8840          
##          Neg Pred Value : 0.5765          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6445          
##    Detection Prevalence : 0.7290          
##       Balanced Accuracy : 0.7488          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.006757e-01   4.772141e-01   7.926130e-01   8.085587e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   7.146690e-23   2.684466e-11
ad_tda_kde_5.60.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8488403            0.6488095            0.8840051 
##       Neg Pred Value            Precision               Recall 
##            0.5765017            0.8840051            0.8488403 
##                   F1           Prevalence       Detection Rate 
##            0.8660659            0.7592138            0.6444513 
## Detection Prevalence    Balanced Accuracy 
##            0.7290131            0.7488249
ad_tda_kde_5.60.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n4_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n4_3_fold
##       Accuracy
## 1 -0.048065774
## 2 -0.002908888
## 3 -0.017272241
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n4_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n4_3_fold
## $winLeft
## [1] 0.7835333
## 
## $winRope
## [1] 0.2164667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n4_3_fold
## $left
## [1] 0.7528292
## 
## $rope
## [1] 0.1636701
## 
## $right
## [1] 0.08350075
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold))
#bf_tda_kde_5.60.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold)
## t = -1.7079, df = 2, p-value = 0.2298
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.08006110  0.03456316
## sample estimates:
##   mean of x 
## -0.02274897
### Test set diff
diff_tda_kde_5.60.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n4_test
##    Accuracy 
## 0.004402129
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n4_test_odds.left<-bst_tda_kde_5.60.5_nn1.n4_test$probLeft/bst_tda_kde_5.60.5_nn1.n4_test$probRight
bst_tda_kde_5.60.5_nn1.n4_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n4_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n4_test)) #bf_tda_pca_5.60.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test))

##Node5

#Neural Network 1

Adult_TDA_KDE_5.60.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n5.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 3832.727811 
## iter  10 value 2564.310250
## iter  20 value 2516.118965
## iter  30 value 2475.935520
## iter  40 value 2404.990071
## iter  50 value 2372.575012
## iter  60 value 2367.968005
## iter  70 value 2322.722369
## iter  80 value 2212.547266
## iter  90 value 2088.824549
## iter 100 value 1825.115690
## final  value 1825.115690 
## stopped after 100 iterations
## # weights:  331
## initial  value 2703.389364 
## iter  10 value 2564.221496
## iter  20 value 2559.804802
## iter  30 value 2411.445858
## iter  40 value 2385.896403
## iter  50 value 2379.206664
## iter  60 value 2365.512802
## iter  70 value 2356.540036
## iter  80 value 2173.303728
## iter  90 value 1856.810119
## iter 100 value 1739.576524
## final  value 1739.576524 
## stopped after 100 iterations
## # weights:  551
## initial  value 2793.244928 
## iter  10 value 2564.096400
## iter  20 value 2523.084205
## iter  30 value 2383.554163
## iter  40 value 2370.279531
## iter  50 value 2368.591185
## iter  60 value 2361.753001
## iter  70 value 2348.035615
## iter  80 value 2298.216706
## iter  90 value 2229.921359
## iter 100 value 2113.488105
## final  value 2113.488105 
## stopped after 100 iterations
## # weights:  771
## initial  value 3688.374704 
## iter  10 value 2546.707711
## iter  20 value 2364.509802
## iter  30 value 2319.807610
## iter  40 value 2263.335921
## iter  50 value 2154.009726
## iter  60 value 1771.578894
## iter  70 value 1662.348358
## iter  80 value 1620.225854
## iter  90 value 1604.021816
## iter 100 value 1590.017819
## final  value 1590.017819 
## stopped after 100 iterations
## # weights:  221
## initial  value 5826.627581 
## iter  10 value 2552.330472
## iter  20 value 2407.339451
## iter  30 value 2346.661684
## iter  40 value 2338.248071
## iter  50 value 2240.339117
## iter  60 value 1955.390891
## iter  70 value 1762.235280
## iter  80 value 1656.887573
## iter  90 value 1602.736186
## iter 100 value 1589.830429
## final  value 1589.830429 
## stopped after 100 iterations
## # weights:  331
## initial  value 7226.566088 
## iter  10 value 2561.925287
## iter  20 value 2400.723479
## iter  30 value 2397.826297
## iter  40 value 2397.154362
## iter  50 value 2396.238741
## iter  60 value 2390.722635
## iter  70 value 2388.181744
## iter  80 value 2388.153327
## iter  80 value 2388.153325
## final  value 2388.153299 
## converged
## # weights:  551
## initial  value 2705.545308 
## iter  10 value 2425.622727
## iter  20 value 2397.368795
## iter  30 value 2397.074456
## iter  40 value 2364.230838
## iter  50 value 2360.834221
## iter  60 value 2259.574561
## iter  70 value 1996.706693
## iter  80 value 1861.300070
## iter  90 value 1801.442000
## iter 100 value 1762.345291
## final  value 1762.345291 
## stopped after 100 iterations
## # weights:  771
## initial  value 7437.506588 
## iter  10 value 2552.328232
## iter  20 value 2372.143916
## iter  30 value 2250.951568
## iter  40 value 2080.374070
## iter  50 value 2071.716189
## iter  60 value 1817.124230
## iter  70 value 1732.273522
## iter  80 value 1640.969745
## iter  90 value 1629.630393
## iter 100 value 1587.105265
## final  value 1587.105265 
## stopped after 100 iterations
## # weights:  221
## initial  value 2873.862304 
## iter  10 value 2519.553020
## iter  20 value 2406.824068
## iter  30 value 2399.183354
## iter  40 value 2378.168235
## iter  50 value 2107.998064
## iter  60 value 1883.674033
## iter  70 value 1783.465410
## iter  80 value 1753.758637
## iter  90 value 1696.683696
## iter 100 value 1638.035875
## final  value 1638.035875 
## stopped after 100 iterations
## # weights:  331
## initial  value 3197.929095 
## iter  10 value 2419.936176
## iter  20 value 2400.542767
## iter  30 value 2392.547347
## iter  40 value 2391.412604
## iter  50 value 2390.394869
## iter  60 value 2335.777613
## iter  70 value 2257.747147
## iter  80 value 2178.120718
## iter  90 value 1817.255441
## iter 100 value 1738.742349
## final  value 1738.742349 
## stopped after 100 iterations
## # weights:  551
## initial  value 5209.785239 
## iter  10 value 2564.109830
## iter  20 value 2509.242501
## iter  30 value 2356.965339
## iter  40 value 2349.624596
## iter  50 value 2342.573014
## iter  60 value 2336.875147
## iter  70 value 2003.949480
## iter  80 value 1875.510520
## iter  90 value 1800.224539
## iter 100 value 1680.419436
## final  value 1680.419436 
## stopped after 100 iterations
## # weights:  771
## initial  value 2700.837876 
## iter  10 value 2520.552416
## iter  20 value 2472.698789
## iter  30 value 2417.170080
## iter  40 value 2323.011005
## iter  50 value 2295.138509
## iter  60 value 2175.422066
## iter  70 value 2153.066059
## iter  80 value 1967.050792
## iter  90 value 1707.785679
## iter 100 value 1682.667698
## final  value 1682.667698 
## stopped after 100 iterations
## # weights:  221
## initial  value 4556.577192 
## final  value 2564.438353 
## converged
## # weights:  331
## initial  value 4197.702688 
## iter  10 value 2470.900844
## iter  20 value 2388.650297
## final  value 2353.753615 
## converged
## # weights:  551
## initial  value 6616.076159 
## iter  10 value 2564.221494
## final  value 2564.221438 
## converged
## # weights:  771
## initial  value 2948.858342 
## iter  10 value 2529.971796
## iter  20 value 2375.511579
## iter  30 value 2344.357552
## iter  40 value 2338.294327
## iter  50 value 2231.809890
## iter  60 value 2088.607450
## iter  70 value 1969.780668
## iter  80 value 1833.983472
## iter  90 value 1731.200582
## iter 100 value 1624.994021
## final  value 1624.994021 
## stopped after 100 iterations
## # weights:  221
## initial  value 4850.067606 
## iter  10 value 2567.914909
## iter  20 value 2564.765927
## iter  30 value 2564.727648
## iter  40 value 2508.980634
## iter  50 value 2439.105745
## iter  60 value 2439.045829
## iter  70 value 2397.157773
## iter  80 value 2388.439861
## iter  90 value 2385.118470
## iter 100 value 2379.069785
## final  value 2379.069785 
## stopped after 100 iterations
## # weights:  331
## initial  value 6617.532237 
## iter  10 value 2553.667477
## iter  20 value 2542.751364
## iter  30 value 2391.201696
## iter  40 value 2385.883431
## iter  50 value 2380.021530
## iter  60 value 2350.266668
## iter  70 value 2341.836919
## iter  80 value 2313.973960
## iter  90 value 2243.170364
## iter 100 value 2126.031763
## final  value 2126.031763 
## stopped after 100 iterations
## # weights:  551
## initial  value 6366.261703 
## iter  10 value 2574.035660
## iter  20 value 2567.653201
## iter  30 value 2553.074086
## iter  40 value 2483.349960
## iter  50 value 2401.854394
## iter  60 value 2387.350490
## iter  70 value 2386.586362
## iter  80 value 2385.213577
## iter  90 value 2385.113710
## iter 100 value 2385.107900
## final  value 2385.107900 
## stopped after 100 iterations
## # weights:  771
## initial  value 3079.206095 
## iter  10 value 2462.033309
## iter  20 value 2375.769261
## iter  30 value 2357.094768
## iter  40 value 2346.654017
## iter  50 value 2342.650084
## iter  60 value 2325.223074
## iter  70 value 2304.810168
## iter  80 value 2156.372249
## iter  90 value 1901.890183
## iter 100 value 1811.752571
## final  value 1811.752571 
## stopped after 100 iterations
## # weights:  221
## initial  value 3516.600127 
## iter  10 value 2555.981337
## iter  20 value 2389.248524
## iter  30 value 2369.298328
## iter  40 value 2338.966376
## iter  50 value 2244.293065
## iter  60 value 1884.550963
## iter  70 value 1738.048126
## iter  80 value 1643.445959
## iter  90 value 1628.319627
## iter 100 value 1624.667813
## final  value 1624.667813 
## stopped after 100 iterations
## # weights:  331
## initial  value 5438.767376 
## iter  10 value 2557.404484
## iter  20 value 2430.824001
## iter  30 value 2344.072029
## iter  40 value 2282.868879
## iter  50 value 2107.776216
## iter  60 value 1922.868554
## iter  70 value 1816.383589
## iter  80 value 1722.019789
## iter  90 value 1659.144892
## iter 100 value 1647.958271
## final  value 1647.958271 
## stopped after 100 iterations
## # weights:  551
## initial  value 3601.858793 
## iter  10 value 2568.615797
## iter  20 value 2565.058976
## iter  30 value 2565.016849
## final  value 2565.016147 
## converged
## # weights:  771
## initial  value 7267.488662 
## iter  10 value 2452.100877
## iter  20 value 2384.423671
## iter  30 value 2360.747222
## iter  40 value 2342.020281
## iter  50 value 2317.176913
## iter  60 value 2102.612601
## iter  70 value 2048.356862
## iter  80 value 1856.534069
## iter  90 value 1738.548655
## iter 100 value 1692.650427
## final  value 1692.650427 
## stopped after 100 iterations
## # weights:  221
## initial  value 3399.647946 
## iter  10 value 2497.312703
## iter  20 value 2376.880990
## iter  30 value 2343.044755
## iter  40 value 2146.683620
## iter  50 value 1827.372332
## iter  60 value 1768.648830
## iter  70 value 1733.868905
## iter  80 value 1689.599578
## iter  90 value 1656.559301
## iter 100 value 1629.993888
## final  value 1629.993888 
## stopped after 100 iterations
## # weights:  331
## initial  value 3930.764042 
## iter  10 value 2552.909953
## iter  20 value 2376.979030
## iter  30 value 2345.274266
## iter  40 value 2307.668222
## iter  50 value 1970.558030
## iter  60 value 1851.521300
## iter  70 value 1836.050868
## iter  80 value 1813.762086
## iter  90 value 1805.144220
## iter 100 value 1729.338864
## final  value 1729.338864 
## stopped after 100 iterations
## # weights:  551
## initial  value 4191.956408 
## iter  10 value 2390.501717
## iter  20 value 2367.617275
## iter  30 value 2248.483308
## iter  40 value 2135.640326
## iter  50 value 1833.965293
## iter  60 value 1681.148291
## iter  70 value 1654.805708
## iter  80 value 1624.432064
## iter  90 value 1606.469224
## iter 100 value 1604.423763
## final  value 1604.423763 
## stopped after 100 iterations
## # weights:  771
## initial  value 3346.707581 
## iter  10 value 2382.197544
## iter  20 value 2373.366687
## iter  30 value 2343.656505
## iter  40 value 2302.167935
## iter  50 value 2281.306619
## iter  60 value 2243.369266
## iter  70 value 1841.459603
## iter  80 value 1745.231063
## iter  90 value 1671.109794
## iter 100 value 1649.230432
## final  value 1649.230432 
## stopped after 100 iterations
## # weights:  221
## initial  value 4245.111206 
## iter  10 value 2560.762130
## iter  20 value 2381.422780
## iter  30 value 2380.093593
## iter  40 value 2380.069384
## iter  50 value 2379.159996
## iter  60 value 2374.015396
## iter  70 value 2371.720807
## final  value 2371.677109 
## converged
## # weights:  331
## initial  value 6190.790887 
## iter  10 value 2551.796125
## iter  20 value 2359.601200
## iter  30 value 2342.397432
## iter  40 value 2271.677575
## iter  50 value 1986.590424
## iter  60 value 1777.770018
## iter  70 value 1689.919743
## iter  80 value 1663.812898
## iter  90 value 1614.655194
## iter 100 value 1604.839477
## final  value 1604.839477 
## stopped after 100 iterations
## # weights:  551
## initial  value 4104.439054 
## iter  10 value 2566.199351
## iter  20 value 2564.710453
## iter  30 value 2564.364703
## iter  40 value 2564.347843
## iter  50 value 2454.282285
## iter  60 value 2377.284899
## iter  70 value 2372.613549
## iter  80 value 2372.345400
## iter  90 value 2360.266396
## iter 100 value 2281.778536
## final  value 2281.778536 
## stopped after 100 iterations
## # weights:  771
## initial  value 4746.647756 
## iter  10 value 2567.878485
## iter  20 value 2563.554181
## iter  30 value 2448.280775
## iter  40 value 2386.549955
## iter  50 value 2365.083308
## iter  60 value 2362.227628
## iter  70 value 2359.884923
## iter  80 value 2337.979875
## iter  90 value 2318.634065
## iter 100 value 2237.058509
## final  value 2237.058509 
## stopped after 100 iterations
## # weights:  221
## initial  value 5967.236747 
## iter  10 value 2564.724984
## iter  20 value 2459.846500
## iter  30 value 2419.213650
## iter  40 value 2396.402868
## iter  50 value 2388.123379
## iter  60 value 2382.962916
## iter  70 value 2354.090712
## iter  80 value 2347.449266
## iter  90 value 2345.788778
## iter 100 value 2344.263885
## final  value 2344.263885 
## stopped after 100 iterations
## # weights:  331
## initial  value 4379.902355 
## iter  10 value 2568.432841
## iter  20 value 2565.057201
## iter  30 value 2565.016499
## final  value 2565.016029 
## converged
## # weights:  551
## initial  value 5106.974462 
## iter  10 value 2564.509653
## iter  20 value 2518.402045
## iter  30 value 2514.583452
## iter  40 value 2396.673139
## iter  50 value 2292.443606
## iter  60 value 2000.917904
## iter  70 value 1870.571050
## iter  80 value 1801.725193
## iter  90 value 1775.717203
## iter 100 value 1762.646135
## final  value 1762.646135 
## stopped after 100 iterations
## # weights:  771
## initial  value 10440.989925 
## iter  10 value 2577.280097
## iter  20 value 2566.707260
## iter  30 value 2566.563859
## iter  40 value 2540.606789
## iter  50 value 2391.629931
## iter  60 value 2379.208735
## iter  70 value 2375.705543
## iter  80 value 2374.971410
## iter  90 value 2374.933539
## iter 100 value 2374.827884
## final  value 2374.827884 
## stopped after 100 iterations
## # weights:  771
## initial  value 4015.595098 
## iter  10 value 3605.651894
## iter  20 value 3555.159572
## iter  30 value 3483.494918
## iter  40 value 3383.240517
## iter  50 value 3040.171039
## iter  60 value 2746.060329
## iter  70 value 2633.033231
## iter  80 value 2564.219021
## iter  90 value 2444.952872
## iter 100 value 2394.871140
## final  value 2394.871140 
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n5_NN1Fit0
## Neural Network 
## 
## 8940 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5960, 5960, 5960 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8597315  0.2122316
##   2     0.5    0.8664430  0.2608848
##   2     0.7    0.8655481  0.2732793
##   3     0.3    0.8647651  0.2681231
##   3     0.5    0.8665548  0.2670302
##   3     0.7    0.8583893  0.1985636
##   5     0.3    0.8533557  0.1406378
##   5     0.5    0.8583893  0.2499608
##   5     0.7    0.8606264  0.2497128
##   7     0.3    0.8684564  0.3465052
##   7     0.5    0.8667785  0.2494509
##   7     0.7    0.8653244  0.2962923
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n5_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8681208 0.2562963    Fold2
## 2 0.8731544 0.4324324    Fold1
## 3 0.8640940 0.3507870    Fold3
ad_tda_kde_5.60.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n5_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -0.02    -0.11     0.04    -0.02    -0.31     0.00     0.08     0.14 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.01     0.05     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.18     0.00     0.00    -0.01 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00    -0.19    -0.02    -0.19     0.00     0.09 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00    -0.05     0.12     0.02     0.04    -0.23     0.00     0.13 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.29     0.00    -0.03     0.08     0.00     0.00     0.10     0.01 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.34    -0.10    -0.07     0.20    -0.14     0.04    -0.01     0.03 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.14    -0.01     0.08    -0.03    -0.02    -0.04    -0.12     0.09 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.02     0.04    -0.40     0.06     0.00    -0.01     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.01     0.00     0.01    -0.03 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00    -0.02     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.02     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00    -0.05     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.35    -0.05     0.15    -0.70    -0.13     0.20     0.03    -0.08 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.33     0.24     0.31     0.00     0.00     0.00     0.84     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00    -0.90     0.00     0.00     0.53 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00    -0.12     0.42     0.51    -1.28    -0.88 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.74     0.78     0.49     0.00     0.35    -0.21     0.38    -0.06 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##    -0.54     0.39     0.40    -0.03     0.41     0.96    -0.63    -0.40 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.01    -0.85     0.16     0.06    -0.05     0.42     0.62     0.48 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##    -1.18     0.82    -0.63    -0.04     0.47    -0.26     0.55    -0.20 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00    -0.03    -0.31    -0.49    -0.58    -0.97     0.75 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##    -1.51     0.88    -0.39     0.48    -0.08     0.04    -0.57     0.75 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##    -0.34     0.42     0.00     0.08     0.71     0.49     0.60    -0.91 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##    -0.58     0.36    -1.06     0.94     0.62     0.11     0.62     0.16 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.63    -0.61     0.12    -0.06     0.19     0.58    -0.35     0.36 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.33     0.67    -0.52    -0.09    -1.09 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
##  0.40  0.00  1.54  0.37 -6.40  0.18  0.40  0.41
vip(Adult_TDA_KDE_5.60.5_n5_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6416  1459
##      >50K    1000   893
##                                           
##                Accuracy : 0.7483          
##                  95% CI : (0.7395, 0.7568)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9943          
##                                           
##                   Kappa : 0.2623          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8652          
##             Specificity : 0.3797          
##          Pos Pred Value : 0.8147          
##          Neg Pred Value : 0.4717          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6568          
##    Detection Prevalence : 0.8062          
##       Balanced Accuracy : 0.6224          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6416  1459
##      >50K    1000   893
##                                           
##                Accuracy : 0.7483          
##                  95% CI : (0.7395, 0.7568)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9943          
##                                           
##                   Kappa : 0.2623          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8652          
##             Specificity : 0.3797          
##          Pos Pred Value : 0.8147          
##          Neg Pred Value : 0.4717          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6568          
##    Detection Prevalence : 0.8062          
##       Balanced Accuracy : 0.6224          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.482596e-01   2.623111e-01   7.395283e-01   7.568427e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.943459e-01   2.557681e-20
ad_tda_kde_5.60.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8651564            0.3796769            0.8147302 
##       Neg Pred Value            Precision               Recall 
##            0.4717380            0.8147302            0.8651564 
##                   F1           Prevalence       Detection Rate 
##            0.8391864            0.7592138            0.6568387 
## Detection Prevalence    Balanced Accuracy 
##            0.8062039            0.6224166
ad_tda_kde_5.60.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n5_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n5_3_fold
##      Accuracy
## 1 -0.06017030
## 2 -0.02306223
## 3 -0.06269886
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9916667
## 
## $winRope
## [1] 0.008333333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n5_3_fold
## $left
## [1] 0.9396975
## 
## $rope
## [1] 0.03123185
## 
## $right
## [1] 0.02907065
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold))
#bf_tda_kde_5.60.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold)
## t = -3.7969, df = 2, p-value = 0.06289
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.103767627  0.006480036
## sample estimates:
##  mean of x 
## -0.0486438
### Test set diff
diff_tda_kde_5.60.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n5_test
##   Accuracy 
## 0.05681818
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nn1.n5_test_odds.left<-bst_tda_kde_5.60.5_nn1.n5_test$probLeft/bst_tda_kde_5.60.5_nn1.n5_test$probRight
bst_tda_kde_5.60.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n5_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n5_test)) #bf_tda_pca_5.60.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n5_test)) 


##Logistic Regression 

adultLrFit <- train(as.factor(adult_df1) ~ ., 
                 data = adult.one_hot_df4Train, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15196, 15195 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8510945  0.5676792
adultLrFit$resample
##    Accuracy     Kappa Resample
## 1 0.8496973 0.5632382    Fold1
## 2 0.8482296 0.5620409    Fold2
## 3 0.8553567 0.5777586    Fold3
ad_lr_fit_re<-adultLrFit$resample[1]

summary(adultLrFit)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.023e+13  6.477e+12   -1.580 0.114062    
## V1                              2.877e-02  1.982e-03   14.517  < 2e-16 ***
## V2..                            1.023e+13  6.477e+12    1.580 0.114062    
## V2.Federal.gov                  1.023e+13  6.477e+12    1.580 0.114062    
## V2.Local.gov                    1.023e+13  6.477e+12    1.580 0.114062    
## V2.Never.worked                -4.493e+15  6.477e+12 -693.776  < 2e-16 ***
## V2.Private                      1.023e+13  6.477e+12    1.580 0.114062    
## V2.Self.emp.inc                 1.023e+13  6.477e+12    1.580 0.114062    
## V2.Self.emp.not.inc             1.023e+13  6.477e+12    1.580 0.114062    
## V2.State.gov                    1.023e+13  6.477e+12    1.580 0.114062    
## V2.Without.pay                  1.023e+13  6.477e+12    1.580 0.114062    
## V3                              6.829e-07  2.062e-07    3.311 0.000929 ***
## V4.10th                        -1.171e+00  1.825e-01   -6.415 1.41e-10 ***
## V4.11th                        -1.000e+00  1.819e-01   -5.498 3.83e-08 ***
## V4.12th                        -7.853e-01  2.752e-01   -2.853 0.004333 ** 
## V4.1st.4th                     -1.871e+00  6.099e-01   -3.067 0.002162 ** 
## V4.5th.6th                     -1.244e+00  3.324e-01   -3.743 0.000182 ***
## V4.7th.8th                     -1.593e+00  2.140e-01   -7.447 9.58e-14 ***
## V4.9th                         -1.634e+00  2.940e-01   -5.558 2.73e-08 ***
## V4.Assoc.acdm                   2.427e-01  1.188e-01    2.043 0.041024 *  
## V4.Assoc.voc                    2.679e-01  1.016e-01    2.637 0.008373 ** 
## V4.Bachelors                    7.912e-01  6.671e-02   11.861  < 2e-16 ***
## V4.Doctorate                    2.068e+00  1.960e-01   10.554  < 2e-16 ***
## V4.HS.grad                     -3.188e-01  6.025e-02   -5.291 1.21e-07 ***
## V4.Masters                      1.203e+00  9.717e-02   12.381  < 2e-16 ***
## V4.Preschool                   -2.478e+01  4.565e+04   -0.001 0.999567    
## V4.Prof.school                  1.768e+00  1.640e-01   10.778  < 2e-16 ***
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -2.067e-01  1.836e-01   -1.126 0.260179    
## V6.Married.AF.spouse            2.268e+00  6.704e-01    3.383 0.000716 ***
## V6.Married.civ.spouse           2.016e+00  3.637e-01    5.543 2.97e-08 ***
## V6.Married.spouse.absent       -2.659e-01  3.270e-01   -0.813 0.416064    
## V6.Never.married               -5.888e-01  1.902e-01   -3.096 0.001963 ** 
## V6.Separated                   -9.858e-02  2.441e-01   -0.404 0.686323    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 2.023e-01  1.192e-01    1.697 0.089726 .  
## V7.Armed.Forces                -7.939e-01  1.624e+00   -0.489 0.624962    
## V7.Craft.repair                 2.979e-01  1.019e-01    2.923 0.003466 ** 
## V7.Exec.managerial              1.007e+00  1.044e-01    9.650  < 2e-16 ***
## V7.Farming.fishing             -8.628e-01  1.757e-01   -4.910 9.12e-07 ***
## V7.Handlers.cleaners           -5.935e-01  1.771e-01   -3.351 0.000806 ***
## V7.Machine.op.inspct           -2.407e-02  1.274e-01   -0.189 0.850211    
## V7.Other.service               -6.090e-01  1.490e-01   -4.087 4.38e-05 ***
## V7.Priv.house.serv             -3.405e+00  1.946e+00   -1.750 0.080130 .  
## V7.Prof.specialty               6.747e-01  1.122e-01    6.012 1.83e-09 ***
## V7.Protective.serv              7.220e-01  1.550e-01    4.658 3.20e-06 ***
## V7.Sales                        4.921e-01  1.077e-01    4.570 4.88e-06 ***
## V7.Tech.support                 9.149e-01  1.416e-01    6.461 1.04e-10 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.421e+00  1.220e-01  -11.642  < 2e-16 ***
## V8.Not.in.family               -9.011e-01  3.368e-01   -2.675 0.007467 ** 
## V8.Other.relative              -1.859e+00  3.008e-01   -6.181 6.38e-10 ***
## V8.Own.child                   -2.126e+00  3.330e-01   -6.383 1.74e-10 ***
## V8.Unmarried                   -1.056e+00  3.482e-01   -3.033 0.002418 ** 
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -7.414e-01  2.697e-01   -2.749 0.005977 ** 
## V9.Asian.Pac.Islander           4.763e-02  1.879e-01    0.253 0.799923    
## V9.Black                       -1.683e-01  9.175e-02   -1.835 0.066569 .  
## V9.Other                       -3.401e-01  3.230e-01   -1.053 0.292291    
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.625e-01  9.424e-02   -9.152  < 2e-16 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.164e-04  1.239e-05   25.527  < 2e-16 ***
## V12                             6.201e-04  4.394e-05   14.113  < 2e-16 ***
## V13                             2.963e-02  1.935e-03   15.312  < 2e-16 ***
## V14..                          -4.819e-01  7.233e-01   -0.666 0.505218    
## V14.Cambodia                    1.147e+00  1.099e+00    1.043 0.296775    
## V14.Canada                     -1.529e-01  7.754e-01   -0.197 0.843649    
## V14.China                      -8.039e-01  8.419e-01   -0.955 0.339658    
## V14.Columbia                   -1.342e+00  1.150e+00   -1.168 0.242916    
## V14.Cuba                        2.932e-01  7.928e-01    0.370 0.711537    
## V14.Dominican.Republic         -1.810e+00  1.268e+00   -1.427 0.153437    
## V14.Ecuador                    -2.579e-01  1.069e+00   -0.241 0.809409    
## V14.El.Salvador                -8.031e-01  9.135e-01   -0.879 0.379330    
## V14.England                     1.845e-01  7.991e-01    0.231 0.817367    
## V14.France                      2.495e-01  1.007e+00    0.248 0.804303    
## V14.Germany                     1.113e-01  7.687e-01    0.145 0.884870    
## V14.Greece                     -1.073e+00  9.239e-01   -1.161 0.245646    
## V14.Guatemala                   2.472e-01  1.066e+00    0.232 0.816614    
## V14.Haiti                      -1.946e+00  1.419e+00   -1.371 0.170353    
## V14.Holand.Netherlands         -2.264e+01  3.459e+05    0.000 0.999948    
## V14.Honduras                   -1.543e+00  2.583e+00   -0.597 0.550296    
## V14.Hong                        3.604e-02  1.018e+00    0.035 0.971757    
## V14.Hungary                    -4.154e-01  1.208e+00   -0.344 0.731004    
## V14.India                      -7.107e-01  8.101e-01   -0.877 0.380320    
## V14.Iran                       -1.027e-01  8.592e-01   -0.120 0.904856    
## V14.Ireland                     4.688e-01  1.051e+00    0.446 0.655675    
## V14.Italy                       3.216e-01  8.264e-01    0.389 0.697120    
## V14.Jamaica                    -1.134e+00  9.678e-01   -1.172 0.241185    
## V14.Japan                       3.956e-01  8.559e-01    0.462 0.643909    
## V14.Laos                       -1.050e+00  1.320e+00   -0.795 0.426381    
## V14.Mexico                     -6.909e-01  7.463e-01   -0.926 0.354542    
## V14.Nicaragua                  -2.445e+01  5.937e+04    0.000 0.999671    
## V14.Outlying.US.Guam.USVI.etc. -2.389e+01  9.718e+04    0.000 0.999804    
## V14.Peru                       -1.208e+00  1.440e+00   -0.839 0.401520    
## V14.Philippines                 1.230e-02  7.716e-01    0.016 0.987281    
## V14.Poland                     -4.460e-01  8.546e-01   -0.522 0.601727    
## V14.Portugal                   -2.091e-01  1.041e+00   -0.201 0.840857    
## V14.Puerto.Rico                -1.305e+00  8.687e-01   -1.502 0.133081    
## V14.Scotland                   -1.326e-01  1.076e+00   -0.123 0.901896    
## V14.South                      -1.915e+00  9.039e-01   -2.118 0.034147 *  
## V14.Taiwan                     -4.376e-01  9.296e-01   -0.471 0.637811    
## V14.Thailand                   -3.764e-01  1.237e+00   -0.304 0.761007    
## V14.Trinadad.Tobago            -4.231e-01  1.156e+00   -0.366 0.714328    
## V14.United.States              -1.082e-01  7.053e-01   -0.153 0.878016    
## V14.Vietnam                    -1.432e+00  9.958e-01   -1.438 0.150307    
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25165  on 22792  degrees of freedom
## Residual deviance: 14343  on 22693  degrees of freedom
## AIC: 14543
## 
## Number of Fisher Scoring iterations: 25
vip(adultLrFit,25) + ggtitle('non-TDA-Assisted LR')

# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6923   940
##      >50K     493  1412
##                                           
##                Accuracy : 0.8533          
##                  95% CI : (0.8461, 0.8603)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5709          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9335          
##             Specificity : 0.6003          
##          Pos Pred Value : 0.8805          
##          Neg Pred Value : 0.7412          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7087          
##    Detection Prevalence : 0.8050          
##       Balanced Accuracy : 0.7669          
##                                           
##        'Positive' Class :  <=50K          
## 
lr_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.532965e-01   5.709073e-01   8.461236e-01   8.602580e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  7.606349e-117   4.844546e-32
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9335221            0.6003401            0.8804528 
##       Neg Pred Value            Precision               Recall 
##            0.7412073            0.8804528            0.9335221 
##                   F1           Prevalence       Detection Rate 
##            0.9062111            0.7592138            0.7087428 
## Detection Prevalence    Balanced Accuracy 
##            0.8049754            0.7669311
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.60.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec, family = 'binomial')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.60.5.n1.vec, 
                    family = 'binomial',
                          method = 'glm', 
                    trControl = fitControl,
                          metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n1_LrFit0
## Generalized Linear Model 
## 
## 6560 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 4373, 4374, 4373 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8458861  0.1724515
Adult_TDA_PC_5.60.5_n1_LrFit0$resample
##    Accuracy      Kappa Resample
## 1 0.7869227 0.09940988    Fold1
## 2 0.8586459 0.18178118    Fold2
## 3 0.8920896 0.23616344    Fold3
ad_tda_pc_5.60.5_n1_lr_fit_re<-Adult_TDA_PC_5.60.5_n1_LrFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (18 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -2.456e+13  1.392e+13  -1.764 0.077701 .  
## V1                             -4.313e-02  4.837e-03  -8.916  < 2e-16 ***
## V2..                            1.632e+00  7.185e-01   2.271 0.023121 *  
## V2.Federal.gov                  8.587e-02  3.110e-01   0.276 0.782470    
## V2.Local.gov                   -4.136e-01  2.410e-01  -1.717 0.086049 .  
## V2.Never.worked                        NA         NA      NA       NA    
## V2.Private                      1.231e+00  2.188e-01   5.624 1.86e-08 ***
## V2.Self.emp.inc                -7.838e-01  2.310e-01  -3.393 0.000691 ***
## V2.Self.emp.not.inc            -1.234e+00  2.226e-01  -5.543 2.98e-08 ***
## V2.State.gov                           NA         NA      NA       NA    
## V2.Without.pay                         NA         NA      NA       NA    
## V3                              1.258e-06  5.262e-07   2.390 0.016853 *  
## V4.10th                         7.061e-01  9.483e-01   0.745 0.456494    
## V4.11th                         2.465e+01  7.928e+04   0.000 0.999752    
## V4.12th                        -1.235e-01  1.193e+00  -0.104 0.917520    
## V4.1st.4th                      2.282e+01  2.834e+05   0.000 0.999936    
## V4.5th.6th                      2.476e+01  1.585e+05   0.000 0.999875    
## V4.7th.8th                      9.693e-01  6.177e-01   1.569 0.116582    
## V4.9th                          4.559e-02  8.163e-01   0.056 0.955460    
## V4.Assoc.acdm                  -6.941e-01  3.288e-01  -2.111 0.034778 *  
## V4.Assoc.voc                   -7.554e-01  2.730e-01  -2.767 0.005661 ** 
## V4.Bachelors                   -1.211e+00  1.778e-01  -6.813 9.55e-12 ***
## V4.Doctorate                   -9.051e-01  2.759e-01  -3.281 0.001036 ** 
## V4.HS.grad                      1.309e-01  1.997e-01   0.655 0.512250    
## V4.Masters                     -1.181e+00  2.049e-01  -5.764 8.22e-09 ***
## V4.Preschool                           NA         NA      NA       NA    
## V4.Prof.school                 -4.743e-01  2.596e-01  -1.827 0.067680 .  
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                     2.456e+13  1.392e+13   1.764 0.077701 .  
## V6.Married.AF.spouse            2.456e+13  1.392e+13   1.764 0.077701 .  
## V6.Married.civ.spouse           2.456e+13  1.392e+13   1.764 0.077701 .  
## V6.Married.spouse.absent        2.456e+13  1.392e+13   1.764 0.077701 .  
## V6.Never.married                2.456e+13  1.392e+13   1.764 0.077701 .  
## V6.Separated                    2.456e+13  1.392e+13   1.764 0.077701 .  
## V6.Widowed                      2.456e+13  1.392e+13   1.764 0.077701 .  
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 1.935e+00  8.107e-01   2.387 0.016982 *  
## V7.Armed.Forces                 2.422e+01  5.165e+05   0.000 0.999963    
## V7.Craft.repair                -3.594e-01  3.351e-01  -1.073 0.283478    
## V7.Exec.managerial             -2.004e-01  3.206e-01  -0.625 0.531873    
## V7.Farming.fishing             -1.233e+00  3.453e-01  -3.571 0.000355 ***
## V7.Handlers.cleaners            2.362e+01  6.352e+04   0.000 0.999703    
## V7.Machine.op.inspct            1.127e+00  8.224e-01   1.371 0.170500    
## V7.Other.service                5.935e-01  8.365e-01   0.709 0.478035    
## V7.Priv.house.serv                     NA         NA      NA       NA    
## V7.Prof.specialty              -4.022e-01  3.327e-01  -1.209 0.226602    
## V7.Protective.serv              4.992e-01  4.674e-01   1.068 0.285536    
## V7.Sales                        3.841e-01  3.411e-01   1.126 0.260134    
## V7.Tech.support                 1.094e+00  6.234e-01   1.755 0.079280 .  
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -2.370e+01  5.521e+05   0.000 0.999966    
## V8.Not.in.family                3.308e+00  6.479e+05   0.000 0.999996    
## V8.Other.relative               1.351e+00  5.988e+05   0.000 0.999998    
## V8.Own.child                    1.028e+00  7.591e+05   0.000 0.999999    
## V8.Unmarried                    2.796e+00  6.480e+05   0.000 0.999997    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           2.450e+01  1.198e+05   0.000 0.999837    
## V9.Asian.Pac.Islander           1.134e+00  5.608e-01   2.022 0.043224 *  
## V9.Black                        2.364e+00  7.597e-01   3.111 0.001862 ** 
## V9.Other                        2.051e-01  1.147e+00   0.179 0.858058    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      1.992e+00  5.550e+05   0.000 0.999997    
## V10.Male                               NA         NA      NA       NA    
## V11                             2.186e-04  2.559e-05   8.545  < 2e-16 ***
## V12                             5.306e-04  9.479e-05   5.597 2.18e-08 ***
## V13                            -3.835e-02  3.793e-03 -10.112  < 2e-16 ***
## V14..                           4.369e-01  1.284e+00   0.340 0.733624    
## V14.Cambodia                    2.410e+01  2.258e+05   0.000 0.999915    
## V14.Canada                      2.272e-01  1.380e+00   0.165 0.869231    
## V14.China                       3.653e-01  1.730e+00   0.211 0.832771    
## V14.Columbia                   -1.012e+00  1.993e+00  -0.508 0.611573    
## V14.Cuba                        8.292e-01  1.488e+00   0.557 0.577398    
## V14.Dominican.Republic                 NA         NA      NA       NA    
## V14.Ecuador                     2.415e+01  2.787e+05   0.000 0.999931    
## V14.El.Salvador                 2.500e+01  1.882e+05   0.000 0.999894    
## V14.England                     1.656e+00  1.666e+00   0.994 0.320287    
## V14.France                     -9.396e-01  1.521e+00  -0.618 0.536688    
## V14.Germany                     7.750e-01  1.485e+00   0.522 0.601816    
## V14.Greece                     -1.693e+00  1.398e+00  -1.210 0.226109    
## V14.Guatemala                   2.169e+01  5.312e+05   0.000 0.999967    
## V14.Haiti                              NA         NA      NA       NA    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                   -1.273e+00  5.331e+05   0.000 0.999998    
## V14.Hong                       -2.072e+00  1.801e+00  -1.150 0.249956    
## V14.Hungary                    -5.727e-01  1.819e+00  -0.315 0.752907    
## V14.India                      -1.720e+00  1.405e+00  -1.224 0.221006    
## V14.Iran                       -1.324e+00  1.384e+00  -0.957 0.338707    
## V14.Ireland                     2.515e+01  2.363e+05   0.000 0.999915    
## V14.Italy                       2.553e+01  1.080e+05   0.000 0.999811    
## V14.Jamaica                     2.181e+01  2.216e+05   0.000 0.999921    
## V14.Japan                      -1.285e+00  1.490e+00  -0.862 0.388419    
## V14.Laos                        2.246e+01  4.943e+05   0.000 0.999964    
## V14.Mexico                      1.498e+00  1.671e+00   0.897 0.369948    
## V14.Nicaragua                   2.371e+01  5.176e+05   0.000 0.999963    
## V14.Outlying.US.Guam.USVI.etc.         NA         NA      NA       NA    
## V14.Peru                        2.338e+01  3.689e+05   0.000 0.999949    
## V14.Philippines                -4.424e-01  1.483e+00  -0.298 0.765530    
## V14.Poland                      2.481e+01  1.578e+05   0.000 0.999875    
## V14.Portugal                    2.603e+01  3.673e+05   0.000 0.999943    
## V14.Puerto.Rico                 2.581e+01  2.089e+05   0.000 0.999901    
## V14.Scotland                    2.722e+01  2.884e+05   0.000 0.999925    
## V14.South                      -9.111e-01  1.561e+00  -0.584 0.559482    
## V14.Taiwan                     -3.596e-01  1.584e+00  -0.227 0.820348    
## V14.Thailand                    2.479e+01  3.462e+05   0.000 0.999943    
## V14.Trinadad.Tobago                    NA         NA      NA       NA    
## V14.United.States               4.263e-01  1.231e+00   0.346 0.729203    
## V14.Vietnam                     2.250e+01  3.359e+05   0.000 0.999947    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4412.6  on 6559  degrees of freedom
## Residual deviance: 3007.7  on 6469  degrees of freedom
## AIC: 3189.7
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n1_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     54    27
##      >50K    7362  2325
##                                           
##                Accuracy : 0.2436          
##                  95% CI : (0.2351, 0.2522)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.002          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.007282        
##             Specificity : 0.988520        
##          Pos Pred Value : 0.666667        
##          Neg Pred Value : 0.240012        
##              Prevalence : 0.759214        
##          Detection Rate : 0.005528        
##    Detection Prevalence : 0.008292        
##       Balanced Accuracy : 0.497901        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     54    27
##      >50K    7362  2325
##                                           
##                Accuracy : 0.2436          
##                  95% CI : (0.2351, 0.2522)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.002          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.007282        
##             Specificity : 0.988520        
##          Pos Pred Value : 0.666667        
##          Neg Pred Value : 0.240012        
##              Prevalence : 0.759214        
##          Detection Rate : 0.005528        
##    Detection Prevalence : 0.008292        
##       Balanced Accuracy : 0.497901        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.243550369   -0.002033172    0.235064668    0.252189231    0.759213759 
## AccuracyPValue  McnemarPValue 
##    1.000000000    0.000000000
ad_tda_pc_5.60.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          0.007281553          0.988520408          0.666666667 
##       Neg Pred Value            Precision               Recall 
##          0.240012388          0.666666667          0.007281553 
##                   F1           Prevalence       Detection Rate 
##          0.014405762          0.759213759          0.005528256 
## Detection Prevalence    Balanced Accuracy 
##          0.008292383          0.497900981
ad_tda_pc_5.60.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.60.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n1_lr_fit_re)
diff_tda_pca_5.60.5_lr_n1_3_fold
##      Accuracy
## 1  0.06277456
## 2 -0.01041636
## 3 -0.03673295
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n1_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n1_3_fold$probRight
bst_tda_pca_5.60.5_lr.n1_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n1_3_fold
## $winLeft
## [1] 0.4649
## 
## $winRope
## [1] 0.06286667
## 
## $winRight
## [1] 0.4722333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n1_3_fold
## $left
## [1] 0.3507095
## 
## $rope
## [1] 0.1983366
## 
## $right
## [1] 0.4509539
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold))
#bf_tda_pca_5.60.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold)
## t = 0.17496, df = 2, p-value = 0.8772
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1228762  0.1332930
## sample estimates:
##   mean of x 
## 0.005208417
### Test set diff
diff_tda_pca_5.60.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n1_test
##  Accuracy 
## 0.6097461
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n1_test_odds.left<-bst_tda_pca_5.60.5_lr.n1_test$probLeft/bst_tda_pca_5.60.5_lr.n1_test$probRight
bst_tda_pca_5.60.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1614667
## 
## $winRight
## [1] 0.8385333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n1_test)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n1_test)) #bf_tda_pca_5.60.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.60.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.60.5.n2.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n2_LrFit0
## Generalized Linear Model 
## 
## 13933 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9289, 9288, 9289 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.7417642  0.4839514
Adult_TDA_PC_5.60.5_n2_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7444014 0.4891124    Fold1
## 2 0.7405813 0.4814669    Fold2
## 3 0.7403101 0.4812748    Fold3
ad_tda_pc_5.60.5_n2_lr_fit_re<-Adult_TDA_PC_5.60.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -2.540e+12  7.197e+12  -0.353 0.724140    
## V1                              1.791e-02  1.932e-03   9.270  < 2e-16 ***
## V2..                            2.540e+12  7.197e+12   0.353 0.724140    
## V2.Federal.gov                  2.540e+12  7.197e+12   0.353 0.724140    
## V2.Local.gov                    2.540e+12  7.197e+12   0.353 0.724140    
## V2.Never.worked                        NA         NA      NA       NA    
## V2.Private                      2.540e+12  7.197e+12   0.353 0.724140    
## V2.Self.emp.inc                 2.540e+12  7.197e+12   0.353 0.724140    
## V2.Self.emp.not.inc             2.540e+12  7.197e+12   0.353 0.724140    
## V2.State.gov                    2.540e+12  7.197e+12   0.353 0.724140    
## V2.Without.pay                  2.540e+12  7.197e+12   0.353 0.724140    
## V3                              8.402e-07  2.057e-07   4.084 4.42e-05 ***
## V4.10th                        -9.815e-01  1.687e-01  -5.817 6.00e-09 ***
## V4.11th                        -1.205e+00  1.842e-01  -6.541 6.12e-11 ***
## V4.12th                        -4.500e-01  2.637e-01  -1.706 0.087949 .  
## V4.1st.4th                     -9.174e-01  5.030e-01  -1.824 0.068184 .  
## V4.5th.6th                     -1.100e+00  3.354e-01  -3.280 0.001040 ** 
## V4.7th.8th                     -1.545e+00  1.960e-01  -7.882 3.22e-15 ***
## V4.9th                         -1.463e+00  2.653e-01  -5.514 3.50e-08 ***
## V4.Assoc.acdm                   1.239e-01  1.181e-01   1.049 0.294054    
## V4.Assoc.voc                    8.890e-02  9.924e-02   0.896 0.370377    
## V4.Bachelors                    6.616e-01  6.597e-02  10.028  < 2e-16 ***
## V4.Doctorate                    1.196e+00  1.798e-01   6.649 2.96e-11 ***
## V4.HS.grad                     -3.164e-01  5.747e-02  -5.506 3.68e-08 ***
## V4.Masters                      9.190e-01  9.735e-02   9.441  < 2e-16 ***
## V4.Preschool                   -2.569e+01  2.474e+05   0.000 0.999917    
## V4.Prof.school                  1.122e+00  1.570e-01   7.144 9.08e-13 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -4.346e-01  5.937e-01  -0.732 0.464162    
## V6.Married.AF.spouse            6.577e-02  1.223e+00   0.054 0.957114    
## V6.Married.civ.spouse          -1.116e+00  7.637e-01  -1.461 0.144052    
## V6.Married.spouse.absent        1.377e-01  8.402e-01   0.164 0.869811    
## V6.Never.married               -1.517e-02  6.158e-01  -0.025 0.980343    
## V6.Separated                    4.035e-01  8.480e-01   0.476 0.634205    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 2.935e-01  1.216e-01   2.414 0.015793 *  
## V7.Armed.Forces                -5.696e-01  1.810e+00  -0.315 0.753045    
## V7.Craft.repair                 1.675e-01  8.960e-02   1.869 0.061633 .  
## V7.Exec.managerial              9.282e-01  9.514e-02   9.756  < 2e-16 ***
## V7.Farming.fishing             -6.902e-01  1.476e-01  -4.677 2.92e-06 ***
## V7.Handlers.cleaners           -4.607e-01  1.592e-01  -2.895 0.003796 ** 
## V7.Machine.op.inspct           -4.459e-02  1.144e-01  -0.390 0.696749    
## V7.Other.service               -5.924e-01  1.597e-01  -3.710 0.000207 ***
## V7.Priv.house.serv             -2.805e+01  3.545e+05   0.000 0.999937    
## V7.Prof.specialty               6.286e-01  1.042e-01   6.032 1.62e-09 ***
## V7.Protective.serv              6.409e-01  1.431e-01   4.477 7.56e-06 ***
## V7.Sales                        4.782e-01  9.792e-02   4.884 1.04e-06 ***
## V7.Tech.support                 9.256e-01  1.408e-01   6.573 4.92e-11 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.011e+00  7.981e-01  -1.267 0.205243    
## V8.Not.in.family               -6.959e-01  9.340e-01  -0.745 0.456243    
## V8.Other.relative              -9.407e-01  8.818e-01  -1.067 0.286043    
## V8.Own.child                   -4.056e-01  9.281e-01  -0.437 0.662111    
## V8.Unmarried                   -1.216e-02  1.019e+00  -0.012 0.990480    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -8.232e-01  2.960e-01  -2.781 0.005419 ** 
## V9.Asian.Pac.Islander           2.100e-01  1.929e-01   1.089 0.276199    
## V9.Black                        3.402e-01  1.097e-01   3.102 0.001919 ** 
## V9.Other                       -2.035e-01  3.513e-01  -0.579 0.562422    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      1.919e+00  7.830e-01   2.452 0.014226 *  
## V10.Male                               NA         NA      NA       NA    
## V11                             2.816e-04  1.363e-05  20.664  < 2e-16 ***
## V12                             6.165e-04  4.545e-05  13.565  < 2e-16 ***
## V13                             2.174e-02  1.928e-03  11.278  < 2e-16 ***
## V14..                          -5.315e-01  7.048e-01  -0.754 0.450726    
## V14.Cambodia                    1.620e+00  1.122e+00   1.444 0.148852    
## V14.Canada                      1.161e-01  7.534e-01   0.154 0.877581    
## V14.China                      -1.211e+00  8.141e-01  -1.487 0.136948    
## V14.Columbia                   -2.552e+00  1.135e+00  -2.248 0.024594 *  
## V14.Cuba                        3.775e-01  7.819e-01   0.483 0.629208    
## V14.Dominican.Republic         -2.582e+01  1.127e+05   0.000 0.999817    
## V14.Ecuador                    -4.815e-01  1.013e+00  -0.475 0.634688    
## V14.El.Salvador                -8.691e-01  8.788e-01  -0.989 0.322668    
## V14.England                     1.384e-01  7.961e-01   0.174 0.862013    
## V14.France                     -3.852e-01  9.261e-01  -0.416 0.677463    
## V14.Germany                     2.855e-01  7.514e-01   0.380 0.703941    
## V14.Greece                     -1.523e+00  9.397e-01  -1.621 0.105050    
## V14.Guatemala                  -2.287e+00  1.594e+00  -1.435 0.151394    
## V14.Haiti                      -2.895e-01  1.143e+00  -0.253 0.800133    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                    2.105e+01  3.648e+05   0.000 0.999954    
## V14.Hong                       -3.825e-01  1.026e+00  -0.373 0.709288    
## V14.Hungary                    -3.069e-01  1.203e+00  -0.255 0.798705    
## V14.India                      -9.404e-01  7.647e-01  -1.230 0.218780    
## V14.Iran                       -9.125e-02  8.493e-01  -0.107 0.914433    
## V14.Ireland                     5.973e-01  1.140e+00   0.524 0.600442    
## V14.Italy                       1.792e-01  7.733e-01   0.232 0.816739    
## V14.Jamaica                     1.145e-01  9.339e-01   0.123 0.902383    
## V14.Japan                      -5.800e-01  8.255e-01  -0.703 0.482321    
## V14.Laos                       -1.330e+00  1.409e+00  -0.944 0.345416    
## V14.Mexico                     -6.616e-01  7.311e-01  -0.905 0.365485    
## V14.Nicaragua                  -1.569e+00  1.175e+00  -1.336 0.181675    
## V14.Outlying.US.Guam.USVI.etc. -2.521e+01  3.035e+05   0.000 0.999934    
## V14.Peru                       -9.033e-01  1.138e+00  -0.794 0.427428    
## V14.Philippines                 3.706e-01  7.670e-01   0.483 0.629029    
## V14.Poland                     -2.732e-01  8.224e-01  -0.332 0.739751    
## V14.Portugal                   -7.221e-01  1.082e+00  -0.668 0.504426    
## V14.Puerto.Rico                -8.384e-01  8.555e-01  -0.980 0.327096    
## V14.Scotland                    6.170e-01  1.419e+00   0.435 0.663727    
## V14.South                      -1.078e+00  8.462e-01  -1.274 0.202666    
## V14.Taiwan                     -6.791e-01  8.524e-01  -0.797 0.425635    
## V14.Thailand                   -2.801e-01  1.292e+00  -0.217 0.828354    
## V14.Trinadad.Tobago             1.411e+00  1.628e+00   0.867 0.386101    
## V14.United.States              -1.153e-01  6.871e-01  -0.168 0.866787    
## V14.Vietnam                    -2.121e+00  1.064e+00  -1.994 0.046145 *  
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 19311  on 13932  degrees of freedom
## Residual deviance: 14308  on 13835  degrees of freedom
## AIC: 14504
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n2_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2775   623
##      >50K    4641  1729
##                                          
##                Accuracy : 0.4611         
##                  95% CI : (0.4512, 0.471)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.069          
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.3742         
##             Specificity : 0.7351         
##          Pos Pred Value : 0.8167         
##          Neg Pred Value : 0.2714         
##              Prevalence : 0.7592         
##          Detection Rate : 0.2841         
##    Detection Prevalence : 0.3479         
##       Balanced Accuracy : 0.5547         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2775   623
##      >50K    4641  1729
##                                          
##                Accuracy : 0.4611         
##                  95% CI : (0.4512, 0.471)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.069          
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.3742         
##             Specificity : 0.7351         
##          Pos Pred Value : 0.8167         
##          Neg Pred Value : 0.2714         
##              Prevalence : 0.7592         
##          Detection Rate : 0.2841         
##    Detection Prevalence : 0.3479         
##       Balanced Accuracy : 0.5547         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.46109746     0.06904083     0.45117366     0.47104447     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.60.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.3741909            0.7351190            0.8166569 
##       Neg Pred Value            Precision               Recall 
##            0.2714286            0.8166569            0.3741909 
##                   F1           Prevalence       Detection Rate 
##            0.5132236            0.7592138            0.2840909 
## Detection Prevalence    Balanced Accuracy 
##            0.3478706            0.5546550
ad_tda_pc_5.60.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.60.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n2_lr_fit_re)
diff_tda_pca_5.60.5_lr_n2_3_fold
##    Accuracy
## 1 0.1052959
## 2 0.1076483
## 3 0.1150466
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n2_3_fold$probRight
bst_tda_pca_5.60.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0092
## 
## $winRight
## [1] 0.9908
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n2_3_fold
## $left
## [1] 0.0004035568
## 
## $rope
## [1] 0.0001785595
## 
## $right
## [1] 0.9994179
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold))
#bf_tda_pca_5.60.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold)
## t = 37.216, df = 2, p-value = 0.0007212
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.09669024 0.12197029
## sample estimates:
## mean of x 
## 0.1093303
### Test set diff
diff_tda_pca_5.60.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n2_test
## Accuracy 
## 0.392199
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n2_test_odds.left<-bst_tda_pca_5.60.5_lr.n2_test$probLeft/bst_tda_pca_5.60.5_lr.n2_test$probRight
bst_tda_pca_5.60.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1571667
## 
## $winRight
## [1] 0.8428333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n2_test)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n2_test)) #bf_tda_pca_5.60.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n2_test))

##Node3

Adult_TDA_PC_5.60.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.60.5.n3.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n3_LrFit0
## Generalized Linear Model 
## 
## 15744 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10496, 10496, 10496 
## Resampling results:
## 
##   Accuracy   Kappa   
##   0.7858867  0.317527
Adult_TDA_PC_5.60.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7877287 0.3279498    Fold1
## 2 0.7888720 0.3253192    Fold2
## 3 0.7810595 0.2993121    Fold3
ad_tda_pc_5.60.5_n3_lr_fit_re<-Adult_TDA_PC_5.60.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     4.582e+12  4.340e+12   1.056 0.291058    
## V1                              1.392e-02  1.923e-03   7.241 4.46e-13 ***
## V2..                           -4.582e+12  4.340e+12  -1.056 0.291058    
## V2.Federal.gov                 -4.582e+12  4.340e+12  -1.056 0.291058    
## V2.Local.gov                   -4.582e+12  4.340e+12  -1.056 0.291058    
## V2.Never.worked                        NA         NA      NA       NA    
## V2.Private                     -4.582e+12  4.340e+12  -1.056 0.291058    
## V2.Self.emp.inc                -4.582e+12  4.340e+12  -1.056 0.291058    
## V2.Self.emp.not.inc            -4.582e+12  4.340e+12  -1.056 0.291058    
## V2.State.gov                   -4.582e+12  4.340e+12  -1.056 0.291058    
## V2.Without.pay                 -4.582e+12  4.340e+12  -1.056 0.291058    
## V3                              9.048e-07  2.000e-07   4.524 6.06e-06 ***
## V4.10th                        -7.706e-01  1.584e-01  -4.866 1.14e-06 ***
## V4.11th                        -8.344e-01  1.631e-01  -5.116 3.12e-07 ***
## V4.12th                        -5.064e-01  2.415e-01  -2.097 0.035970 *  
## V4.1st.4th                     -1.371e+00  4.832e-01  -2.837 0.004552 ** 
## V4.5th.6th                     -1.167e+00  3.041e-01  -3.839 0.000124 ***
## V4.7th.8th                     -1.599e+00  2.160e-01  -7.405 1.31e-13 ***
## V4.9th                         -1.214e+00  2.309e-01  -5.259 1.45e-07 ***
## V4.Assoc.acdm                  -3.880e-01  1.180e-01  -3.288 0.001008 ** 
## V4.Assoc.voc                   -2.549e-01  1.016e-01  -2.509 0.012092 *  
## V4.Bachelors                   -3.628e-01  7.009e-02  -5.176 2.26e-07 ***
## V4.Doctorate                    4.003e-01  1.956e-01   2.047 0.040671 *  
## V4.HS.grad                     -3.999e-01  5.627e-02  -7.106 1.20e-12 ***
## V4.Masters                     -3.452e-01  1.053e-01  -3.280 0.001040 ** 
## V4.Preschool                   -3.223e+01  6.791e+04   0.000 0.999621    
## V4.Prof.school                  1.446e-01  1.808e-01   0.800 0.423960    
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -3.611e-01  1.766e-01  -2.045 0.040832 *  
## V6.Married.AF.spouse            2.217e+00  6.919e-01   3.203 0.001358 ** 
## V6.Married.civ.spouse           7.560e-01  3.157e-01   2.394 0.016651 *  
## V6.Married.spouse.absent       -2.923e-01  2.872e-01  -1.018 0.308726    
## V6.Never.married               -3.957e-01  1.838e-01  -2.153 0.031352 *  
## V6.Separated                   -2.464e-01  2.360e-01  -1.044 0.296434    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 5.636e-01  1.104e-01   5.106 3.29e-07 ***
## V7.Armed.Forces                -2.483e+01  1.748e+05   0.000 0.999887    
## V7.Craft.repair                 1.507e-01  9.271e-02   1.626 0.103949    
## V7.Exec.managerial              3.818e-01  1.012e-01   3.773 0.000161 ***
## V7.Farming.fishing             -1.306e+00  2.163e-01  -6.039 1.55e-09 ***
## V7.Handlers.cleaners           -3.150e-01  1.519e-01  -2.075 0.038024 *  
## V7.Machine.op.inspct            4.006e-03  1.124e-01   0.036 0.971569    
## V7.Other.service               -2.905e-01  1.342e-01  -2.165 0.030399 *  
## V7.Priv.house.serv             -3.803e+00  3.995e+00  -0.952 0.341151    
## V7.Prof.specialty               2.737e-01  1.085e-01   2.522 0.011679 *  
## V7.Protective.serv              3.327e-01  1.581e-01   2.104 0.035376 *  
## V7.Sales                        4.347e-01  1.007e-01   4.317 1.58e-05 ***
## V7.Tech.support                 9.375e-01  1.325e-01   7.073 1.51e-12 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -3.633e-01  1.164e-01  -3.122 0.001799 ** 
## V8.Not.in.family                2.668e-01  2.831e-01   0.942 0.346048    
## V8.Other.relative              -4.646e-01  2.622e-01  -1.772 0.076466 .  
## V8.Own.child                   -5.271e-01  2.791e-01  -1.889 0.058932 .  
## V8.Unmarried                    3.425e-01  2.960e-01   1.157 0.247317    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -1.648e-02  2.434e-01  -0.068 0.946019    
## V9.Asian.Pac.Islander           4.562e-01  1.772e-01   2.574 0.010039 *  
## V9.Black                        5.699e-01  8.518e-02   6.691 2.22e-11 ***
## V9.Other                        2.357e-01  2.941e-01   0.801 0.422909    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      1.112e+00  9.682e-02  11.487  < 2e-16 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.058e-04  1.211e-05  25.244  < 2e-16 ***
## V12                             3.456e-04  4.594e-05   7.523 5.33e-14 ***
## V13                             1.414e-02  1.951e-03   7.247 4.26e-13 ***
## V14..                          -6.466e-01  8.317e-01  -0.777 0.436930    
## V14.Cambodia                    6.247e-01  1.027e+00   0.608 0.543006    
## V14.Canada                     -4.614e-01  8.800e-01  -0.524 0.600107    
## V14.China                      -1.712e+00  9.647e-01  -1.775 0.075884 .  
## V14.Columbia                   -2.332e+00  1.317e+00  -1.771 0.076622 .  
## V14.Cuba                       -7.632e-02  8.938e-01  -0.085 0.931951    
## V14.Dominican.Republic         -1.770e+00  1.324e+00  -1.337 0.181342    
## V14.Ecuador                    -6.519e-01  1.128e+00  -0.578 0.563420    
## V14.El.Salvador                -1.134e+00  1.011e+00  -1.122 0.262008    
## V14.England                    -1.952e-01  8.959e-01  -0.218 0.827536    
## V14.France                     -3.075e-02  1.037e+00  -0.030 0.976332    
## V14.Germany                     7.704e-03  8.649e-01   0.009 0.992892    
## V14.Greece                     -1.755e+00  1.165e+00  -1.506 0.132101    
## V14.Guatemala                  -5.952e-01  1.108e+00  -0.537 0.591125    
## V14.Haiti                      -1.119e-01  1.078e+00  -0.104 0.917297    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                    2.470e+01  3.840e+05   0.000 0.999949    
## V14.Hong                       -3.123e-01  1.108e+00  -0.282 0.778044    
## V14.Hungary                    -1.493e+00  1.391e+00  -1.074 0.282939    
## V14.India                      -9.342e-01  9.080e-01  -1.029 0.303560    
## V14.Iran                       -1.073e+00  1.040e+00  -1.032 0.302243    
## V14.Ireland                    -3.300e-01  1.074e+00  -0.307 0.758710    
## V14.Italy                       7.604e-02  8.998e-01   0.085 0.932655    
## V14.Jamaica                    -1.725e-01  9.408e-01  -0.183 0.854512    
## V14.Japan                      -2.401e-01  9.347e-01  -0.257 0.797289    
## V14.Laos                       -1.481e+00  1.382e+00  -1.071 0.284098    
## V14.Mexico                     -1.074e+00  8.492e-01  -1.265 0.205822    
## V14.Nicaragua                  -1.131e+00  1.139e+00  -0.994 0.320430    
## V14.Outlying.US.Guam.USVI.etc. -2.574e+01  1.476e+05   0.000 0.999861    
## V14.Peru                       -1.406e+00  1.363e+00  -1.031 0.302445    
## V14.Philippines                -3.709e-02  8.643e-01  -0.043 0.965766    
## V14.Poland                     -5.510e-01  9.317e-01  -0.591 0.554294    
## V14.Portugal                   -7.682e-01  1.128e+00  -0.681 0.495841    
## V14.Puerto.Rico                -6.534e-01  9.283e-01  -0.704 0.481490    
## V14.Scotland                   -6.511e-01  1.263e+00  -0.516 0.606184    
## V14.South                      -1.038e+00  9.463e-01  -1.097 0.272857    
## V14.Taiwan                     -4.993e-01  9.980e-01  -0.500 0.616876    
## V14.Thailand                   -1.574e+00  1.388e+00  -1.134 0.256650    
## V14.Trinadad.Tobago            -5.416e-01  1.212e+00  -0.447 0.654967    
## V14.United.States              -3.648e-01  8.159e-01  -0.447 0.654793    
## V14.Vietnam                    -1.364e+00  1.018e+00  -1.341 0.180068    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 17906  on 15743  degrees of freedom
## Residual deviance: 14521  on 15646  degrees of freedom
## AIC: 14717
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n3_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6827  1701
##      >50K     589   651
##                                          
##                Accuracy : 0.7656         
##                  95% CI : (0.757, 0.7739)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.07246        
##                                          
##                   Kappa : 0.2354         
##                                          
##  Mcnemar's Test P-Value : < 2e-16        
##                                          
##             Sensitivity : 0.9206         
##             Specificity : 0.2768         
##          Pos Pred Value : 0.8005         
##          Neg Pred Value : 0.5250         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6989         
##    Detection Prevalence : 0.8731         
##       Balanced Accuracy : 0.5987         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6827  1701
##      >50K     589   651
##                                          
##                Accuracy : 0.7656         
##                  95% CI : (0.757, 0.7739)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.07246        
##                                          
##                   Kappa : 0.2354         
##                                          
##  Mcnemar's Test P-Value : < 2e-16        
##                                          
##             Sensitivity : 0.9206         
##             Specificity : 0.2768         
##          Pos Pred Value : 0.8005         
##          Neg Pred Value : 0.5250         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6989         
##    Detection Prevalence : 0.8731         
##       Balanced Accuracy : 0.5987         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.655610e-01   2.353541e-01   7.570300e-01   7.739334e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   7.246161e-02  3.104101e-119
ad_tda_pc_5.60.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9205771            0.2767857            0.8005394 
##       Neg Pred Value            Precision               Recall 
##            0.5250000            0.8005394            0.9205771 
##                   F1           Prevalence       Detection Rate 
##            0.8563723            0.7592138            0.6989148 
## Detection Prevalence    Balanced Accuracy 
##            0.8730549            0.5986814
ad_tda_pc_5.60.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.60.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n3_lr_fit_re)
diff_tda_pca_5.60.5_lr_n3_3_fold
##    Accuracy
## 1 0.1052959
## 2 0.1076483
## 3 0.1150466
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n3_3_fold$probRight
bst_tda_pca_5.60.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008533333
## 
## $winRight
## [1] 0.9914667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n3_3_fold
## $left
## [1] 0.0004035568
## 
## $rope
## [1] 0.0001785595
## 
## $right
## [1] 0.9994179
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold))
#bf_tda_pca_5.60.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold)
## t = 37.216, df = 2, p-value = 0.0007212
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.09669024 0.12197029
## sample estimates:
## mean of x 
## 0.1093303
### Test set diff
diff_tda_pca_5.60.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n3_test
##   Accuracy 
## 0.08773546
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n3_test_odds.left<-bst_tda_pca_5.60.5_lr.n3_test$probLeft/bst_tda_pca_5.60.5_lr.n3_test$probRight
bst_tda_pca_5.60.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1600667
## 
## $winRight
## [1] 0.8399333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n3_test)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n3_test)) #bf_tda_pca_5.60.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n3_test))

##Node4

Adult_TDA_PC_5.60.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.60.5.n4.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n4_LrFit0
## Generalized Linear Model 
## 
## 19829 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 13220, 13219, 13219 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9419035  0.3381238
Adult_TDA_PC_5.60.5_n4_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9470419 0.4066884    Fold1
## 2 0.9403933 0.3012704    Fold2
## 3 0.9382753 0.3064126    Fold3
ad_tda_pc_5.60.5_n4_lr_fit_re<-Adult_TDA_PC_5.60.5_n4_LrFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -5.305e+12  1.359e+13  -0.390 0.696199    
## V1                              2.776e-02  3.297e-03   8.420  < 2e-16 ***
## V2..                            5.305e+12  1.359e+13   0.390 0.696199    
## V2.Federal.gov                  5.305e+12  1.359e+13   0.390 0.696199    
## V2.Local.gov                    5.305e+12  1.359e+13   0.390 0.696199    
## V2.Never.worked                 5.305e+12  1.359e+13   0.390 0.696199    
## V2.Private                      5.305e+12  1.359e+13   0.390 0.696199    
## V2.Self.emp.inc                 5.305e+12  1.359e+13   0.390 0.696199    
## V2.Self.emp.not.inc             5.305e+12  1.359e+13   0.390 0.696199    
## V2.State.gov                    5.305e+12  1.359e+13   0.390 0.696199    
## V2.Without.pay                  5.305e+12  1.359e+13   0.390 0.696199    
## V3                              6.440e-07  3.267e-07   1.971 0.048707 *  
## V4.10th                        -1.270e+00  3.938e-01  -3.225 0.001259 ** 
## V4.11th                        -4.071e-01  2.854e-01  -1.427 0.153723    
## V4.12th                        -8.291e-01  4.464e-01  -1.857 0.063261 .  
## V4.1st.4th                     -1.628e+00  1.065e+00  -1.529 0.126354    
## V4.5th.6th                     -1.222e+00  6.613e-01  -1.848 0.064662 .  
## V4.7th.8th                     -1.208e+00  4.595e-01  -2.628 0.008593 ** 
## V4.9th                         -4.071e-01  3.750e-01  -1.086 0.277622    
## V4.Assoc.acdm                   1.706e-01  1.730e-01   0.986 0.323958    
## V4.Assoc.voc                    2.550e-01  1.642e-01   1.553 0.120395    
## V4.Bachelors                    5.677e-01  1.074e-01   5.284 1.26e-07 ***
## V4.Doctorate                    1.557e+00  2.909e-01   5.351 8.76e-08 ***
## V4.HS.grad                     -3.858e-01  1.025e-01  -3.766 0.000166 ***
## V4.Masters                      6.479e-01  1.577e-01   4.110 3.96e-05 ***
## V4.Preschool                   -4.190e+01  1.103e+07   0.000 0.999997    
## V4.Prof.school                  8.962e-01  2.994e-01   2.993 0.002763 ** 
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                     3.473e-02  1.737e-01   0.200 0.841511    
## V6.Married.AF.spouse            2.923e+00  7.134e-01   4.097 4.18e-05 ***
## V6.Married.civ.spouse           1.946e+00  3.627e-01   5.366 8.05e-08 ***
## V6.Married.spouse.absent       -1.693e-02  2.975e-01  -0.057 0.954608    
## V6.Never.married               -2.070e-01  1.897e-01  -1.091 0.275098    
## V6.Separated                   -1.458e-01  2.399e-01  -0.608 0.543274    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                -3.826e-02  2.230e-01  -0.172 0.863768    
## V7.Armed.Forces                -2.332e+01  1.260e+05   0.000 0.999852    
## V7.Craft.repair                 1.128e-01  2.336e-01   0.483 0.629119    
## V7.Exec.managerial              4.956e-01  2.222e-01   2.231 0.025686 *  
## V7.Farming.fishing             -1.435e+00  5.115e-01  -2.806 0.005017 ** 
## V7.Handlers.cleaners           -9.752e-01  3.890e-01  -2.507 0.012176 *  
## V7.Machine.op.inspct           -9.598e-01  2.969e-01  -3.232 0.001228 ** 
## V7.Other.service               -7.050e-01  2.497e-01  -2.824 0.004747 ** 
## V7.Priv.house.serv             -4.524e+00  2.340e+00  -1.933 0.053264 .  
## V7.Prof.specialty               2.273e-01  2.284e-01   0.995 0.319643    
## V7.Protective.serv              8.066e-01  3.098e-01   2.604 0.009218 ** 
## V7.Sales                        2.258e-01  2.264e-01   0.998 0.318509    
## V7.Tech.support                 3.456e-01  2.608e-01   1.325 0.185172    
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -6.587e+00  5.914e-01 -11.139  < 2e-16 ***
## V8.Not.in.family               -6.209e-01  3.335e-01  -1.862 0.062662 .  
## V8.Other.relative              -1.940e+00  3.257e-01  -5.958 2.56e-09 ***
## V8.Own.child                   -1.766e+00  3.248e-01  -5.437 5.40e-08 ***
## V8.Unmarried                   -6.939e-01  3.429e-01  -2.024 0.042995 *  
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           4.993e-02  3.405e-01   0.147 0.883427    
## V9.Asian.Pac.Islander           1.831e-01  2.717e-01   0.674 0.500417    
## V9.Black                       -1.246e-01  1.249e-01  -0.997 0.318693    
## V9.Other                        1.680e-01  4.192e-01   0.401 0.688620    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -2.862e-01  9.215e-02  -3.106 0.001894 ** 
## V10.Male                               NA         NA      NA       NA    
## V11                             3.687e-04  1.590e-05  23.189  < 2e-16 ***
## V12                             5.003e-04  7.319e-05   6.836 8.17e-12 ***
## V13                             2.709e-02  3.056e-03   8.866  < 2e-16 ***
## V14..                          -2.295e+00  1.292e+00  -1.777 0.075619 .  
## V14.Cambodia                   -3.397e+00  2.440e+00  -1.392 0.163797    
## V14.Canada                     -2.147e+00  1.362e+00  -1.576 0.114969    
## V14.China                      -2.031e+00  1.434e+00  -1.416 0.156711    
## V14.Columbia                   -2.862e+00  1.651e+00  -1.734 0.083007 .  
## V14.Cuba                       -2.645e+00  1.423e+00  -1.859 0.063092 .  
## V14.Dominican.Republic         -2.946e+00  1.659e+00  -1.776 0.075798 .  
## V14.Ecuador                    -1.269e+00  1.684e+00  -0.754 0.451137    
## V14.El.Salvador                -2.178e+00  1.514e+00  -1.438 0.150320    
## V14.England                    -2.254e+00  1.378e+00  -1.636 0.101755    
## V14.France                     -1.722e+00  1.480e+00  -1.163 0.244717    
## V14.Germany                    -2.260e+00  1.353e+00  -1.670 0.094911 .  
## V14.Greece                     -1.731e+00  1.700e+00  -1.018 0.308684    
## V14.Guatemala                  -7.303e-01  1.498e+00  -0.487 0.625997    
## V14.Haiti                      -1.757e+00  1.536e+00  -1.144 0.252519    
## V14.Holand.Netherlands         -2.393e+01  3.511e+05   0.000 0.999946    
## V14.Honduras                   -2.518e+01  1.051e+05   0.000 0.999809    
## V14.Hong                       -2.495e+00  1.750e+00  -1.426 0.154003    
## V14.Hungary                    -2.131e+00  1.700e+00  -1.254 0.209987    
## V14.India                      -1.853e+00  1.417e+00  -1.308 0.190981    
## V14.Iran                       -2.620e+01  8.169e+04   0.000 0.999744    
## V14.Ireland                    -2.059e+00  1.658e+00  -1.242 0.214323    
## V14.Italy                      -8.864e-01  1.388e+00  -0.639 0.523045    
## V14.Jamaica                    -1.427e+00  1.446e+00  -0.987 0.323868    
## V14.Japan                      -9.820e-01  1.391e+00  -0.706 0.480183    
## V14.Laos                       -1.933e+00  1.718e+00  -1.125 0.260601    
## V14.Mexico                     -2.510e+00  1.332e+00  -1.884 0.059546 .  
## V14.Nicaragua                  -1.809e+00  1.724e+00  -1.049 0.293962    
## V14.Outlying.US.Guam.USVI.etc. -2.642e+01  9.785e+04   0.000 0.999785    
## V14.Peru                       -2.528e+01  6.768e+04   0.000 0.999702    
## V14.Philippines                -1.991e+00  1.345e+00  -1.480 0.138777    
## V14.Poland                     -1.990e+00  1.481e+00  -1.344 0.178998    
## V14.Portugal                   -1.501e+00  1.528e+00  -0.982 0.325986    
## V14.Puerto.Rico                -2.261e+00  1.373e+00  -1.647 0.099634 .  
## V14.Scotland                   -2.751e+00  1.801e+00  -1.527 0.126728    
## V14.South                      -3.475e+00  1.529e+00  -2.273 0.023021 *  
## V14.Taiwan                     -1.683e+00  1.482e+00  -1.136 0.255930    
## V14.Thailand                   -2.815e+00  1.765e+00  -1.595 0.110661    
## V14.Trinadad.Tobago            -2.597e+01  9.829e+04   0.000 0.999789    
## V14.United.States              -2.160e+00  1.265e+00  -1.707 0.087761 .  
## V14.Vietnam                    -2.191e+00  1.538e+00  -1.424 0.154365    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 9522.7  on 19828  degrees of freedom
## Residual deviance: 6139.2  on 19729  degrees of freedom
## AIC: 6339.2
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n4_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7356  2061
##      >50K      60   291
##                                          
##                Accuracy : 0.7829         
##                  95% CI : (0.7746, 0.791)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1.727e-08      
##                                          
##                   Kappa : 0.163          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9919         
##             Specificity : 0.1237         
##          Pos Pred Value : 0.7811         
##          Neg Pred Value : 0.8291         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7531         
##    Detection Prevalence : 0.9641         
##       Balanced Accuracy : 0.5578         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7356  2061
##      >50K      60   291
##                                          
##                Accuracy : 0.7829         
##                  95% CI : (0.7746, 0.791)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1.727e-08      
##                                          
##                   Kappa : 0.163          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9919         
##             Specificity : 0.1237         
##          Pos Pred Value : 0.7811         
##          Neg Pred Value : 0.8291         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7531         
##    Detection Prevalence : 0.9641         
##       Balanced Accuracy : 0.5578         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.60.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.828624e-01   1.629729e-01   7.745512e-01   7.910046e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.727476e-08   0.000000e+00
ad_tda_pc_5.60.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9919094            0.1237245            0.7811405 
##       Neg Pred Value            Precision               Recall 
##            0.8290598            0.7811405            0.9919094 
##                   F1           Prevalence       Detection Rate 
##            0.8739975            0.7592138            0.7530713 
## Detection Prevalence    Balanced Accuracy 
##            0.9640663            0.5578169
ad_tda_pc_5.60.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.60.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n4_lr_fit_re)
diff_tda_pca_5.60.5_lr_n4_3_fold
##      Accuracy
## 1 -0.09734462
## 2 -0.09216378
## 3 -0.08291867
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n4_3_fold$probRight
bst_tda_pca_5.60.5_lr.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n4_3_fold
## $winLeft
## [1] 0.9918667
## 
## $winRope
## [1] 0.008133333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n4_3_fold
## $left
## [1] 0.9981925
## 
## $rope
## [1] 0.0006438001
## 
## $right
## [1] 0.001163699
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold))
#bf_tda_pca_5.60.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold)
## t = -21.523, df = 2, p-value = 0.002152
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1089625 -0.0726555
## sample estimates:
##   mean of x 
## -0.09080902
### Test set diff
diff_tda_pca_5.60.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n4_test
##   Accuracy 
## 0.07043407
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n4_test_odds.left<-bst_tda_pca_5.60.5_lr.n4_test$probLeft/bst_tda_pca_5.60.5_lr.n4_test$probRight
bst_tda_pca_5.60.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1604667
## 
## $winRight
## [1] 0.8395333
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n4_test)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n4_test)) #bf_tda_pca_5.60.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n4_test))

##Node5

Adult_TDA_PC_5.60.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.60.5.n5.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n5_LrFit0
## Generalized Linear Model 
## 
## 16508 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11006, 11004, 11006 
## Resampling results:
## 
##   Accuracy   Kappa     
##   0.9792246  0.01033087
Adult_TDA_PC_5.60.5_n5_LrFit0$resample
##    Accuracy        Kappa Resample
## 1 0.9885496 -0.004986866    Fold1
## 2 0.9591206  0.004610428    Fold2
## 3 0.9900036  0.031369034    Fold3
ad_tda_pc_5.60.5_n5_lr_fit_re<-Adult_TDA_PC_5.60.5_n5_LrFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (10 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                    -2.936e+15  4.126e+07  -71154738   <2e-16 ***
## V1                              2.795e+12  5.604e+04   49876062   <2e-16 ***
## V2..                            1.720e+15  2.273e+07   75673437   <2e-16 ***
## V2.Federal.gov                  5.023e+15  2.273e+07  220983147   <2e-16 ***
## V2.Local.gov                    4.783e+15  2.259e+07  211738434   <2e-16 ***
## V2.Never.worked                 1.614e+15  3.402e+07   47436536   <2e-16 ***
## V2.Private                      1.368e+15  2.247e+07   60866370   <2e-16 ***
## V2.Self.emp.inc                 4.270e+15  2.328e+07  183431419   <2e-16 ***
## V2.Self.emp.not.inc             6.485e+14  2.263e+07   28664230   <2e-16 ***
## V2.State.gov                    1.237e+15  2.264e+07   54655129   <2e-16 ***
## V2.Without.pay                         NA         NA         NA       NA    
## V3                             -1.310e+08  4.998e+00  -26217733   <2e-16 ***
## V4.10th                        -1.187e+15  2.964e+06 -400354538   <2e-16 ***
## V4.11th                        -3.707e+13  2.565e+06  -14450207   <2e-16 ***
## V4.12th                        -4.253e+11  3.914e+06    -108653   <2e-16 ***
## V4.1st.4th                     -6.758e+14  7.196e+06  -93908497   <2e-16 ***
## V4.5th.6th                      3.554e+14  4.552e+06   78080360   <2e-16 ***
## V4.7th.8th                     -4.865e+13  4.112e+06  -11829978   <2e-16 ***
## V4.9th                          6.394e+13  4.097e+06   15605585   <2e-16 ***
## V4.Assoc.acdm                  -1.205e+14  3.003e+06  -40127749   <2e-16 ***
## V4.Assoc.voc                   -1.032e+14  2.837e+06  -36378165   <2e-16 ***
## V4.Bachelors                   -1.550e+14  1.956e+06  -79203626   <2e-16 ***
## V4.Doctorate                    1.783e+15  1.417e+07  125811363   <2e-16 ***
## V4.HS.grad                     -8.817e+13  1.392e+06  -63365127   <2e-16 ***
## V4.Masters                     -6.340e+13  3.804e+06  -16667070   <2e-16 ***
## V4.Preschool                   -2.061e+15  1.117e+07 -184560787   <2e-16 ***
## V4.Prof.school                  1.630e+15  1.031e+07  158096449   <2e-16 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                     3.783e+13  2.711e+06   13952512   <2e-16 ***
## V6.Married.AF.spouse            1.308e+15  2.112e+07   61938495   <2e-16 ***
## V6.Married.civ.spouse           1.030e+14  6.505e+06   15831397   <2e-16 ***
## V6.Married.spouse.absent        3.750e+13  4.245e+06    8834211   <2e-16 ***
## V6.Never.married                4.887e+13  2.955e+06   16539569   <2e-16 ***
## V6.Separated                   -3.250e+13  3.378e+06   -9622182   <2e-16 ***
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                -3.215e+15  3.256e+06 -987396861   <2e-16 ***
## V7.Armed.Forces                -7.024e+15  2.777e+07 -252902842   <2e-16 ***
## V7.Craft.repair                -3.175e+15  3.399e+06 -934022857   <2e-16 ***
## V7.Exec.managerial             -3.161e+15  3.608e+06 -875992005   <2e-16 ***
## V7.Farming.fishing             -5.693e+14  4.612e+06 -123425649   <2e-16 ***
## V7.Handlers.cleaners           -3.228e+15  3.641e+06 -886584972   <2e-16 ***
## V7.Machine.op.inspct           -3.252e+15  3.561e+06 -913261254   <2e-16 ***
## V7.Other.service               -3.178e+15  3.221e+06 -986546973   <2e-16 ***
## V7.Priv.house.serv             -3.194e+15  6.425e+06 -497125067   <2e-16 ***
## V7.Prof.specialty              -3.047e+15  3.662e+06 -832039959   <2e-16 ***
## V7.Protective.serv             -2.911e+15  5.518e+06 -527500316   <2e-16 ***
## V7.Sales                       -3.108e+15  3.357e+06 -925978357   <2e-16 ***
## V7.Tech.support                -3.230e+15  4.265e+06 -757421188   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                     -1.902e+14  1.739e+07  -10932429   <2e-16 ***
## V8.Not.in.family                9.589e+13  6.529e+06   14686788   <2e-16 ***
## V8.Other.relative               3.574e+13  6.477e+06    5518054   <2e-16 ***
## V8.Own.child                    8.360e+13  6.500e+06   12861365   <2e-16 ***
## V8.Unmarried                    8.373e+13  6.612e+06   12663374   <2e-16 ***
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo           1.125e+14  4.866e+06   23118239   <2e-16 ***
## V9.Asian.Pac.Islander           4.161e+13  4.308e+06    9658458   <2e-16 ***
## V9.Black                        6.112e+13  1.597e+06   38268207   <2e-16 ***
## V9.Other                        3.183e+14  5.163e+06   61648817   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                      1.187e+14  1.280e+06   92740768   <2e-16 ***
## V10.Male                               NA         NA         NA       NA    
## V11                             7.832e+10  3.472e+02  225608643   <2e-16 ***
## V12                            -5.422e+10  1.860e+03  -29141741   <2e-16 ***
## V13                             1.081e+12  4.891e+04   22096896   <2e-16 ***
## V14..                          -5.132e+13  3.384e+07   -1516483   <2e-16 ***
## V14.Cambodia                   -1.154e+14  4.134e+07   -2790132   <2e-16 ***
## V14.Canada                      1.607e+15  3.480e+07   46194406   <2e-16 ***
## V14.China                       2.410e+15  3.617e+07   66642546   <2e-16 ***
## V14.Columbia                    3.040e+15  3.520e+07   86358347   <2e-16 ***
## V14.Cuba                       -1.292e+14  3.500e+07   -3692668   <2e-16 ***
## V14.Dominican.Republic          3.231e+15  3.483e+07   92762519   <2e-16 ***
## V14.Ecuador                    -2.158e+14  3.762e+07   -5735603   <2e-16 ***
## V14.El.Salvador                -2.142e+14  3.452e+07   -6205917   <2e-16 ***
## V14.England                     3.269e+14  3.523e+07    9279923   <2e-16 ***
## V14.France                      2.065e+15  3.977e+07   51937536   <2e-16 ***
## V14.Germany                     2.223e+15  3.457e+07   64321883   <2e-16 ***
## V14.Greece                     -1.385e+14  4.115e+07   -3365698   <2e-16 ***
## V14.Guatemala                   2.086e+15  3.499e+07   59615589   <2e-16 ***
## V14.Haiti                       2.355e+15  3.546e+07   66407367   <2e-16 ***
## V14.Holand.Netherlands          1.036e+14  7.523e+07    1376528   <2e-16 ***
## V14.Honduras                    1.695e+15  3.882e+07   43659366   <2e-16 ***
## V14.Hong                        2.794e+15  3.989e+07   70036978   <2e-16 ***
## V14.Hungary                     6.959e+14  4.213e+07   16517752   <2e-16 ***
## V14.India                       7.609e+14  3.620e+07   21020539   <2e-16 ***
## V14.Iran                       -1.041e+15  3.974e+07  -26209066   <2e-16 ***
## V14.Ireland                     6.450e+14  3.756e+07   17170262   <2e-16 ***
## V14.Italy                       2.388e+15  3.667e+07   65124246   <2e-16 ***
## V14.Jamaica                     3.887e+14  3.471e+07   11197872   <2e-16 ***
## V14.Japan                       4.693e+14  3.597e+07   13048901   <2e-16 ***
## V14.Laos                        6.471e+14  3.947e+07   16394801   <2e-16 ***
## V14.Mexico                      4.010e+14  3.381e+07   11860351   <2e-16 ***
## V14.Nicaragua                   3.787e+15  3.654e+07  103631629   <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc.  3.143e+15  3.879e+07   81014532   <2e-16 ***
## V14.Peru                        3.152e+15  3.652e+07   86308999   <2e-16 ***
## V14.Philippines                 3.966e+15  3.419e+07  115989282   <2e-16 ***
## V14.Poland                     -8.437e+14  3.577e+07  -23586483   <2e-16 ***
## V14.Portugal                    4.216e+14  3.716e+07   11346808   <2e-16 ***
## V14.Puerto.Rico                 2.402e+15  3.448e+07   69667946   <2e-16 ***
## V14.Scotland                    2.255e+15  4.336e+07   52008946   <2e-16 ***
## V14.South                       2.206e+15  3.552e+07   62124248   <2e-16 ***
## V14.Taiwan                      1.321e+15  3.707e+07   35642271   <2e-16 ***
## V14.Thailand                    2.586e+15  3.900e+07   66305671   <2e-16 ***
## V14.Trinadad.Tobago             3.073e+15  3.883e+07   79134290   <2e-16 ***
## V14.United.States               2.717e+14  3.360e+07    8086328   <2e-16 ***
## V14.Vietnam                     3.196e+15  3.527e+07   90615666   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  1518.4  on 16507  degrees of freedom
## Residual deviance: 25374.7  on 16409  degrees of freedom
## AIC: 25573
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n5_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7290  2190
##      >50K     126   162
##                                           
##                Accuracy : 0.7629          
##                  95% CI : (0.7543, 0.7713)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.2006          
##                                           
##                   Kappa : 0.0741          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.98301         
##             Specificity : 0.06888         
##          Pos Pred Value : 0.76899         
##          Neg Pred Value : 0.56250         
##              Prevalence : 0.75921         
##          Detection Rate : 0.74631         
##    Detection Prevalence : 0.97052         
##       Balanced Accuracy : 0.52594         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7290  2190
##      >50K     126   162
##                                           
##                Accuracy : 0.7629          
##                  95% CI : (0.7543, 0.7713)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.2006          
##                                           
##                   Kappa : 0.0741          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.98301         
##             Specificity : 0.06888         
##          Pos Pred Value : 0.76899         
##          Neg Pred Value : 0.56250         
##              Prevalence : 0.75921         
##          Detection Rate : 0.74631         
##    Detection Prevalence : 0.97052         
##       Balanced Accuracy : 0.52594         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76289926     0.07408412     0.75433618     0.77130532     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.20059653     0.00000000
ad_tda_pc_5.60.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.98300971           0.06887755           0.76898734 
##       Neg Pred Value            Precision               Recall 
##           0.56250000           0.76898734           0.98300971 
##                   F1           Prevalence       Detection Rate 
##           0.86292614           0.75921376           0.74631450 
## Detection Prevalence    Balanced Accuracy 
##           0.97051597           0.52594363
ad_tda_pc_5.60.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.60.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n5_lr_fit_re)
diff_tda_pca_5.60.5_lr_n5_3_fold
##     Accuracy
## 1 -0.1388523
## 2 -0.1108911
## 3 -0.1346470
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n5_3_fold$probRight
bst_tda_pca_5.60.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n5_3_fold
## $winLeft
## [1] 0.9913667
## 
## $winRope
## [1] 0.008633333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n5_3_fold
## $left
## [1] 0.996419
## 
## $rope
## [1] 0.0009543443
## 
## $right
## [1] 0.002626614
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold))
#bf_tda_pca_5.60.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold)
## t = -14.72, df = 2, p-value = 0.004584
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.16558296 -0.09067728
## sample estimates:
##  mean of x 
## -0.1281301
### Test set diff
diff_tda_pca_5.60.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n5_test
##   Accuracy 
## 0.09039722
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_lr.n5_test_odds.left<-bst_tda_pca_5.60.5_lr.n5_test$probLeft/bst_tda_pca_5.60.5_lr.n5_test$probRight
bst_tda_pca_5.60.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1588
## 
## $winRight
## [1] 0.8412
# Bayesian Correlated Test

bct_tda_pca_5.60.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n5_test)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n5_test)) #bf_tda_pca_5.60.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.60.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.60.5.n1.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n1_LrFit0
## Generalized Linear Model 
## 
## 15260 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10173, 10174, 10173 
## Resampling results:
## 
##   Accuracy   Kappa   
##   0.8581258  0.615439
Adult_TDA_KDE_5.60.5_n1_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8594456 0.6184364    Fold1
## 2 0.8576484 0.6172245    Fold2
## 3 0.8572833 0.6106561    Fold3
ad_tda_kde_5.60.5_n1_lr_fit_re<-Adult_TDA_KDE_5.60.5_n1_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -6.142e+12  1.239e+13  -0.496 0.620110    
## V1                              1.518e-02  2.306e-03   6.585 4.56e-11 ***
## V2..                            6.142e+12  1.239e+13   0.496 0.620110    
## V2.Federal.gov                  6.142e+12  1.239e+13   0.496 0.620110    
## V2.Local.gov                    6.142e+12  1.239e+13   0.496 0.620110    
## V2.Never.worked                 6.142e+12  1.239e+13   0.496 0.620110    
## V2.Private                      6.142e+12  1.239e+13   0.496 0.620110    
## V2.Self.emp.inc                 6.142e+12  1.239e+13   0.496 0.620110    
## V2.Self.emp.not.inc             6.142e+12  1.239e+13   0.496 0.620110    
## V2.State.gov                    6.142e+12  1.239e+13   0.496 0.620110    
## V2.Without.pay                  6.142e+12  1.239e+13   0.496 0.620110    
## V3                              7.344e-07  2.017e-07   3.641 0.000271 ***
## V4.10th                        -1.128e+00  1.674e-01  -6.742 1.56e-11 ***
## V4.11th                        -1.072e+00  1.671e-01  -6.413 1.42e-10 ***
## V4.12th                        -7.837e-01  3.087e-01  -2.538 0.011135 *  
## V4.1st.4th                     -1.700e+00  4.850e-01  -3.504 0.000457 ***
## V4.5th.6th                     -1.455e+00  3.062e-01  -4.752 2.01e-06 ***
## V4.7th.8th                     -1.572e+00  1.929e-01  -8.149 3.67e-16 ***
## V4.9th                         -1.287e+00  2.270e-01  -5.669 1.44e-08 ***
## V4.Assoc.acdm                   1.192e-01  1.470e-01   0.811 0.417351    
## V4.Assoc.voc                    1.628e-01  1.456e-01   1.118 0.263642    
## V4.Bachelors                    6.651e-01  9.143e-02   7.275 3.48e-13 ***
## V4.Doctorate                    1.899e+00  1.719e-01  11.049  < 2e-16 ***
## V4.HS.grad                     -4.291e-01  9.057e-02  -4.738 2.16e-06 ***
## V4.Masters                      1.127e+00  1.104e-01  10.205  < 2e-16 ***
## V4.Preschool                   -3.330e+01  9.685e+04   0.000 0.999726    
## V4.Prof.school                  1.666e+00  1.529e-01  10.901  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -1.933e-01  1.782e-01  -1.085 0.278056    
## V6.Married.AF.spouse            2.466e+00  8.528e-01   2.892 0.003829 ** 
## V6.Married.civ.spouse           2.036e+00  4.216e-01   4.829 1.37e-06 ***
## V6.Married.spouse.absent       -2.048e-01  3.325e-01  -0.616 0.537886    
## V6.Never.married               -8.052e-01  1.894e-01  -4.252 2.12e-05 ***
## V6.Separated                   -3.555e-01  2.656e-01  -1.339 0.180721    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                -1.669e-01  1.540e-01  -1.084 0.278455    
## V7.Armed.Forces                -2.270e-01  2.296e+00  -0.099 0.921251    
## V7.Craft.repair                 1.588e-02  1.316e-01   0.121 0.903974    
## V7.Exec.managerial              7.165e-01  1.324e-01   5.413 6.19e-08 ***
## V7.Farming.fishing             -1.108e+00  2.005e-01  -5.526 3.27e-08 ***
## V7.Handlers.cleaners           -7.512e-01  2.356e-01  -3.188 0.001431 ** 
## V7.Machine.op.inspct           -5.187e-01  1.766e-01  -2.937 0.003312 ** 
## V7.Other.service               -1.083e+00  1.962e-01  -5.520 3.39e-08 ***
## V7.Priv.house.serv             -3.848e+00  1.747e+00  -2.203 0.027629 *  
## V7.Prof.specialty               3.384e-01  1.390e-01   2.434 0.014923 *  
## V7.Protective.serv              2.530e-01  2.085e-01   1.214 0.224793    
## V7.Sales                        1.023e-01  1.385e-01   0.739 0.459977    
## V7.Tech.support                 4.957e-01  1.875e-01   2.643 0.008205 ** 
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.266e+00  1.450e-01  -8.731  < 2e-16 ***
## V8.Not.in.family               -5.156e-01  4.071e-01  -1.266 0.205375    
## V8.Other.relative              -1.278e+00  3.662e-01  -3.491 0.000482 ***
## V8.Own.child                   -1.723e+00  4.221e-01  -4.083 4.45e-05 ***
## V8.Unmarried                   -5.268e-01  4.189e-01  -1.258 0.208517    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -1.668e-01  2.913e-01  -0.573 0.566815    
## V9.Asian.Pac.Islander          -1.754e-02  2.423e-01  -0.072 0.942299    
## V9.Black                       -1.080e-01  1.103e-01  -0.979 0.327734    
## V9.Other                        2.535e-01  3.757e-01   0.675 0.499837    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.974e-01  1.066e-01  -8.419  < 2e-16 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.052e-04  1.414e-05  21.586  < 2e-16 ***
## V12                             6.961e-04  5.292e-05  13.155  < 2e-16 ***
## V13                             3.004e-02  2.218e-03  13.540  < 2e-16 ***
## V14..                          -1.449e+00  1.137e+00  -1.275 0.202411    
## V14.Cambodia                   -3.782e-02  1.433e+00  -0.026 0.978950    
## V14.Canada                     -1.058e+00  1.172e+00  -0.903 0.366680    
## V14.China                      -1.976e+00  1.227e+00  -1.611 0.107143    
## V14.Columbia                   -2.998e+00  1.413e+00  -2.121 0.033931 *  
## V14.Cuba                       -1.552e+00  1.196e+00  -1.298 0.194354    
## V14.Dominican.Republic         -2.470e+01  3.907e+04  -0.001 0.999496    
## V14.Ecuador                    -2.819e+00  1.910e+00  -1.476 0.140023    
## V14.El.Salvador                -1.865e+00  1.325e+00  -1.408 0.159130    
## V14.England                    -9.328e-01  1.186e+00  -0.786 0.431585    
## V14.France                     -9.459e-01  1.274e+00  -0.742 0.457911    
## V14.Germany                    -7.957e-01  1.193e+00  -0.667 0.504693    
## V14.Greece                     -1.988e+00  1.363e+00  -1.458 0.144829    
## V14.Guatemala                  -1.478e+00  1.500e+00  -0.985 0.324396    
## V14.Haiti                      -2.243e+00  1.874e+00  -1.197 0.231472    
## V14.Holand.Netherlands         -2.367e+01  3.223e+05   0.000 0.999941    
## V14.Honduras                   -2.111e+00  2.839e+00  -0.744 0.457073    
## V14.Hong                       -6.762e-01  1.385e+00  -0.488 0.625273    
## V14.Hungary                    -3.321e-01  1.473e+00  -0.225 0.821696    
## V14.India                      -1.865e+00  1.197e+00  -1.559 0.119004    
## V14.Iran                       -2.005e+00  1.390e+00  -1.442 0.149222    
## V14.Ireland                    -1.381e+00  1.688e+00  -0.818 0.413343    
## V14.Italy                       3.534e-01  1.184e+00   0.299 0.765237    
## V14.Jamaica                    -2.563e+00  1.532e+00  -1.673 0.094333 .  
## V14.Japan                      -4.642e-01  1.286e+00  -0.361 0.718023    
## V14.Laos                       -7.318e-01  1.516e+00  -0.483 0.629310    
## V14.Mexico                     -1.762e+00  1.147e+00  -1.537 0.124377    
## V14.Nicaragua                  -1.886e+00  1.394e+00  -1.353 0.176200    
## V14.Outlying.US.Guam.USVI.etc. -2.592e+01  1.244e+05   0.000 0.999834    
## V14.Peru                       -1.528e+00  1.583e+00  -0.965 0.334443    
## V14.Philippines                -1.053e+00  1.180e+00  -0.892 0.372499    
## V14.Poland                     -1.188e+00  1.286e+00  -0.924 0.355727    
## V14.Portugal                   -2.408e+00  1.686e+00  -1.429 0.153119    
## V14.Puerto.Rico                -6.206e-01  1.236e+00  -0.502 0.615675    
## V14.Scotland                   -4.862e-01  1.548e+00  -0.314 0.753535    
## V14.South                      -3.051e+00  1.306e+00  -2.336 0.019482 *  
## V14.Taiwan                     -7.284e-01  1.290e+00  -0.565 0.572285    
## V14.Thailand                   -1.994e+00  1.784e+00  -1.118 0.263509    
## V14.Trinadad.Tobago            -1.246e+00  1.675e+00  -0.744 0.456827    
## V14.United.States              -1.074e+00  1.120e+00  -0.959 0.337576    
## V14.Vietnam                    -1.899e+00  1.415e+00  -1.343 0.179355    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 17488.4  on 15259  degrees of freedom
## Residual deviance:  9509.3  on 15160  degrees of freedom
## AIC: 9709.3
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n1_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6970  1001
##      >50K     446  1351
##                                           
##                Accuracy : 0.8519          
##                  95% CI : (0.8447, 0.8589)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5593          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9399          
##             Specificity : 0.5744          
##          Pos Pred Value : 0.8744          
##          Neg Pred Value : 0.7518          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7136          
##    Detection Prevalence : 0.8160          
##       Balanced Accuracy : 0.7571          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6970  1001
##      >50K     446  1351
##                                           
##                Accuracy : 0.8519          
##                  95% CI : (0.8447, 0.8589)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5593          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9399          
##             Specificity : 0.5744          
##          Pos Pred Value : 0.8744          
##          Neg Pred Value : 0.7518          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7136          
##    Detection Prevalence : 0.8160          
##       Balanced Accuracy : 0.7571          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.518632e-01   5.593270e-01   8.446625e-01   8.588534e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  3.746356e-113   4.771065e-48
ad_tda_kde_5.60.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9398598            0.5744048            0.8744198 
##       Neg Pred Value            Precision               Recall 
##            0.7518086            0.8744198            0.9398598 
##                   F1           Prevalence       Detection Rate 
##            0.9059596            0.7592138            0.7135545 
## Detection Prevalence    Balanced Accuracy 
##            0.8160319            0.7571323
ad_tda_kde_5.60.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n1_lr_fit_re)
diff_tda_kde_5.60.5_lr_n1_3_fold
##       Accuracy
## 1 -0.009748357
## 2 -0.009418882
## 3 -0.001926598
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n1_3_fold$probRight
bst_tda_kde_5.60.5_lr.n1_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n1_3_fold
## $left
## [1] 0.2100575
## 
## $rope
## [1] 0.7755917
## 
## $right
## [1] 0.01435083
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold))
#bf_tda_kde_5.60.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold)
## t = -2.7529, df = 2, p-value = 0.1105
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.018020736  0.003958178
## sample estimates:
##    mean of x 
## -0.007031279
### Test set diff
diff_tda_kde_5.60.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n1_test
##    Accuracy 
## 0.001433251
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n1_test_odds.left<-bst_tda_kde_5.60.5_lr.n1_test$probLeft/bst_tda_kde_5.60.5_lr.n1_test$probRight
bst_tda_kde_5.60.5_lr.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n1_test)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n1_test)) #bf_tda_pca_5.60.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n1_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.60.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.60.5.n2.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n2_LrFit0
## Generalized Linear Model 
## 
## 14482 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9654, 9655, 9655 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.837868  0.5866575
Adult_TDA_KDE_5.60.5_n2_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8340928 0.5799871    Fold1
## 2 0.8402735 0.5877605    Fold2
## 3 0.8392376 0.5922249    Fold3
ad_tda_kde_5.60.5_n2_lr_fit_re<-Adult_TDA_KDE_5.60.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (15 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.309e+13  9.862e+12   -1.327 0.184466    
## V1                              4.049e-02  2.794e-03   14.493  < 2e-16 ***
## V2..                            1.309e+13  9.862e+12    1.327 0.184466    
## V2.Federal.gov                  1.309e+13  9.862e+12    1.327 0.184466    
## V2.Local.gov                    1.309e+13  9.862e+12    1.327 0.184466    
## V2.Never.worked                -4.491e+15  9.862e+12 -455.349  < 2e-16 ***
## V2.Private                      1.309e+13  9.862e+12    1.327 0.184466    
## V2.Self.emp.inc                 1.309e+13  9.862e+12    1.327 0.184466    
## V2.Self.emp.not.inc             1.309e+13  9.862e+12    1.327 0.184466    
## V2.State.gov                    1.309e+13  9.862e+12    1.327 0.184466    
## V2.Without.pay                  1.309e+13  9.862e+12    1.327 0.184466    
## V3                              1.108e-06  2.829e-07    3.917 8.96e-05 ***
## V4.10th                        -1.102e+00  4.439e-01   -2.482 0.013066 *  
## V4.11th                        -8.364e-01  1.988e-01   -4.208 2.58e-05 ***
## V4.12th                        -7.124e-01  2.927e-01   -2.434 0.014943 *  
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                   2.085e-01  1.279e-01    1.630 0.103122    
## V4.Assoc.voc                    1.955e-01  1.314e-01    1.489 0.136600    
## V4.Bachelors                    9.590e-01  8.476e-02   11.314  < 2e-16 ***
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -4.015e-01  8.285e-02   -4.845 1.26e-06 ***
## V4.Masters                      1.363e+00  1.086e-01   12.547  < 2e-16 ***
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                  2.391e+00  2.008e-01   11.908  < 2e-16 ***
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -1.674e-02  2.200e-01   -0.076 0.939333    
## V6.Married.AF.spouse            2.853e+00  8.913e-01    3.201 0.001369 ** 
## V6.Married.civ.spouse           1.809e+00  4.933e-01    3.668 0.000245 ***
## V6.Married.spouse.absent        1.727e-02  3.627e-01    0.048 0.962025    
## V6.Never.married               -5.343e-01  2.302e-01   -2.321 0.020284 *  
## V6.Separated                   -2.855e-01  3.026e-01   -0.943 0.345537    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                -1.043e-02  1.524e-01   -0.068 0.945425    
## V7.Armed.Forces                -2.422e+01  2.622e+05    0.000 0.999926    
## V7.Craft.repair                 1.091e-01  1.350e-01    0.808 0.419225    
## V7.Exec.managerial              8.528e-01  1.345e-01    6.338 2.32e-10 ***
## V7.Farming.fishing             -7.283e-01  2.070e-01   -3.518 0.000435 ***
## V7.Handlers.cleaners           -5.735e-01  2.418e-01   -2.371 0.017723 *  
## V7.Machine.op.inspct           -3.798e-01  1.762e-01   -2.155 0.031131 *  
## V7.Other.service               -9.053e-01  1.939e-01   -4.670 3.01e-06 ***
## V7.Priv.house.serv             -3.426e+00  2.418e+00   -1.417 0.156563    
## V7.Prof.specialty               4.991e-01  1.408e-01    3.544 0.000394 ***
## V7.Protective.serv              6.908e-01  2.001e-01    3.452 0.000556 ***
## V7.Sales                        3.437e-01  1.392e-01    2.469 0.013534 *  
## V7.Tech.support                 5.862e-01  1.782e-01    3.290 0.001001 ** 
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.444e+00  1.402e-01  -10.303  < 2e-16 ***
## V8.Not.in.family               -1.176e+00  4.607e-01   -2.553 0.010681 *  
## V8.Other.relative              -1.698e+00  3.971e-01   -4.277 1.89e-05 ***
## V8.Own.child                   -2.339e+00  4.729e-01   -4.946 7.57e-07 ***
## V8.Unmarried                   -1.266e+00  4.708e-01   -2.690 0.007150 ** 
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -8.523e-02  3.050e-01   -0.279 0.779895    
## V9.Asian.Pac.Islander          -3.720e-02  2.165e-01   -0.172 0.863567    
## V9.Black                       -1.473e-02  1.112e-01   -0.132 0.894606    
## V9.Other                        4.444e-02  3.804e-01    0.117 0.907010    
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.839e-01  1.061e-01   -8.327  < 2e-16 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.255e-04  1.496e-05   21.755  < 2e-16 ***
## V12                             7.599e-04  5.598e-05   13.575  < 2e-16 ***
## V13                             2.552e-02  2.465e-03   10.353  < 2e-16 ***
## V14..                          -4.037e-01  8.580e-01   -0.470 0.638033    
## V14.Cambodia                    1.943e+00  1.262e+00    1.540 0.123630    
## V14.Canada                     -1.803e-02  9.283e-01   -0.019 0.984507    
## V14.China                      -4.556e-01  1.005e+00   -0.453 0.650328    
## V14.Columbia                   -2.457e+01  5.482e+04    0.000 0.999642    
## V14.Cuba                        3.746e-01  9.599e-01    0.390 0.696328    
## V14.Dominican.Republic         -2.402e+01  6.168e+04    0.000 0.999689    
## V14.Ecuador                    -1.364e+00  1.496e+00   -0.912 0.361966    
## V14.El.Salvador                -1.258e-01  1.058e+00   -0.119 0.905377    
## V14.England                     6.551e-01  9.257e-01    0.708 0.479102    
## V14.France                      8.094e-01  1.158e+00    0.699 0.484717    
## V14.Germany                     7.270e-01  9.023e-01    0.806 0.420428    
## V14.Greece                     -1.297e+00  1.148e+00   -1.130 0.258643    
## V14.Guatemala                  -6.867e-01  2.179e+00   -0.315 0.752665    
## V14.Haiti                      -3.621e-01  1.177e+00   -0.308 0.758395    
## V14.Holand.Netherlands         -2.246e+01  3.276e+05    0.000 0.999945    
## V14.Honduras                   -2.252e+01  1.135e+05    0.000 0.999842    
## V14.Hong                        1.802e+00  1.315e+00    1.370 0.170542    
## V14.Hungary                     6.030e-01  1.429e+00    0.422 0.673095    
## V14.India                      -9.643e-02  9.128e-01   -0.106 0.915862    
## V14.Iran                        1.989e-01  1.127e+00    0.176 0.859909    
## V14.Ireland                    -2.388e+01  1.224e+05    0.000 0.999844    
## V14.Italy                       1.144e+00  9.684e-01    1.181 0.237472    
## V14.Jamaica                    -1.196e+00  1.151e+00   -1.039 0.298678    
## V14.Japan                       6.682e-01  1.011e+00    0.661 0.508751    
## V14.Laos                       -2.443e+01  1.052e+05    0.000 0.999815    
## V14.Mexico                     -5.701e-01  9.323e-01   -0.611 0.540877    
## V14.Nicaragua                  -4.672e-01  1.388e+00   -0.337 0.736460    
## V14.Outlying.US.Guam.USVI.etc. -2.408e+01  1.845e+05    0.000 0.999896    
## V14.Peru                       -4.700e-02  1.286e+00   -0.037 0.970837    
## V14.Philippines                 6.400e-01  9.024e-01    0.709 0.478163    
## V14.Poland                      6.504e-01  1.012e+00    0.642 0.520630    
## V14.Portugal                    8.490e-01  1.435e+00    0.592 0.554169    
## V14.Puerto.Rico                -7.499e-01  1.061e+00   -0.707 0.479752    
## V14.Scotland                    1.339e-01  1.370e+00    0.098 0.922146    
## V14.South                      -8.075e-01  1.004e+00   -0.804 0.421444    
## V14.Taiwan                      8.773e-02  1.025e+00    0.086 0.931796    
## V14.Thailand                   -8.249e-01  1.465e+00   -0.563 0.573362    
## V14.Trinadad.Tobago            -1.844e-01  1.740e+00   -0.106 0.915585    
## V14.United.States               2.699e-01  8.346e-01    0.323 0.746386    
## V14.Vietnam                    -1.080e+00  1.220e+00   -0.885 0.376068    
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 17221.4  on 14481  degrees of freedom
## Residual deviance:  9708.1  on 14388  degrees of freedom
## AIC: 9896.1
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n2_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6902   992
##      >50K     514  1360
##                                           
##                Accuracy : 0.8458          
##                  95% CI : (0.8385, 0.8529)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5469          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9307          
##             Specificity : 0.5782          
##          Pos Pred Value : 0.8743          
##          Neg Pred Value : 0.7257          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7066          
##    Detection Prevalence : 0.8081          
##       Balanced Accuracy : 0.7545          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6902   992
##      >50K     514  1360
##                                           
##                Accuracy : 0.8458          
##                  95% CI : (0.8385, 0.8529)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5469          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9307          
##             Specificity : 0.5782          
##          Pos Pred Value : 0.8743          
##          Neg Pred Value : 0.7257          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7066          
##    Detection Prevalence : 0.8081          
##       Balanced Accuracy : 0.7545          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.458231e-01   5.468678e-01   8.385076e-01   8.529317e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   2.418652e-98   1.005859e-34
ad_tda_kde_5.60.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9306904            0.5782313            0.8743349 
##       Neg Pred Value            Precision               Recall 
##            0.7257204            0.8743349            0.9306904 
##                   F1           Prevalence       Detection Rate 
##            0.9016329            0.7592138            0.7065930 
## Detection Prevalence    Balanced Accuracy 
##            0.8081491            0.7544608
ad_tda_kde_5.60.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n2_lr_fit_re)
diff_tda_kde_5.60.5_lr_n2_3_fold
##      Accuracy
## 1 0.015604497
## 2 0.007956103
## 3 0.016119051
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n2_3_fold$probRight
bst_tda_kde_5.60.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3069333
## 
## $winRight
## [1] 0.6930667
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n2_3_fold
## $left
## [1] 0.008392836
## 
## $rope
## [1] 0.1919669
## 
## $right
## [1] 0.7996403
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold))
#bf_tda_kde_5.60.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold)
## t = 5.0112, df = 2, p-value = 0.03759
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.001870099 0.024583001
## sample estimates:
##  mean of x 
## 0.01322655
### Test set diff
diff_tda_kde_5.60.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n2_test
##    Accuracy 
## 0.007473382
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n2_test_odds.left<-bst_tda_kde_5.60.5_lr.n2_test$probLeft/bst_tda_kde_5.60.5_lr.n2_test$probRight
bst_tda_kde_5.60.5_lr.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n2_test)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n2_test)) #bf_tda_pca_5.60.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n2_test))

##Node3

Adult_TDA_KDE_5.60.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.60.5.n3.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n3_LrFit0
## Generalized Linear Model 
## 
## 13266 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8844, 8844, 8844 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8340118  0.5665226
Adult_TDA_KDE_5.60.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8335595 0.5693901    Fold1
## 2 0.8367255 0.5687698    Fold2
## 3 0.8317503 0.5614079    Fold3
ad_tda_kde_5.60.5_n3_lr_fit_re<-Adult_TDA_KDE_5.60.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (17 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -3.534e+12  5.012e+12   -0.705 0.480792    
## V1                              5.465e-02  3.383e-03   16.154  < 2e-16 ***
## V2..                            3.534e+12  5.012e+12    0.705 0.480792    
## V2.Federal.gov                  3.534e+12  5.012e+12    0.705 0.480792    
## V2.Local.gov                    3.534e+12  5.012e+12    0.705 0.480792    
## V2.Never.worked                -4.500e+15  5.012e+12 -897.844  < 2e-16 ***
## V2.Private                      3.534e+12  5.012e+12    0.705 0.480792    
## V2.Self.emp.inc                 3.534e+12  5.012e+12    0.705 0.480792    
## V2.Self.emp.not.inc             3.534e+12  5.012e+12    0.705 0.480792    
## V2.State.gov                    3.534e+12  5.012e+12    0.705 0.480792    
## V2.Without.pay                  3.534e+12  5.012e+12    0.705 0.480792    
## V3                              1.200e-06  3.901e-07    3.077 0.002088 ** 
## V4.10th                                NA         NA       NA       NA    
## V4.11th                        -8.826e-01  7.509e-01   -1.175 0.239880    
## V4.12th                        -4.441e-01  3.219e-01   -1.379 0.167752    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                   4.222e-01  1.314e-01    3.213 0.001313 ** 
## V4.Assoc.voc                    3.291e-01  1.237e-01    2.661 0.007789 ** 
## V4.Bachelors                    1.100e+00  8.627e-02   12.749  < 2e-16 ***
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -2.741e-01  8.024e-02   -3.417 0.000634 ***
## V4.Masters                      1.455e+00  1.263e-01   11.527  < 2e-16 ***
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -6.112e-02  2.879e-01   -0.212 0.831902    
## V6.Married.AF.spouse            3.036e+00  9.005e-01    3.371 0.000748 ***
## V6.Married.civ.spouse           2.160e+00  5.014e-01    4.309 1.64e-05 ***
## V6.Married.spouse.absent        6.195e-02  4.133e-01    0.150 0.880844    
## V6.Never.married               -4.394e-01  2.979e-01   -1.475 0.140283    
## V6.Separated                   -2.142e-01  3.630e-01   -0.590 0.555187    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 2.647e-01  1.526e-01    1.735 0.082706 .  
## V7.Armed.Forces                -2.407e+01  1.761e+05    0.000 0.999891    
## V7.Craft.repair                 2.148e-01  1.353e-01    1.588 0.112367    
## V7.Exec.managerial              1.024e+00  1.372e-01    7.467 8.21e-14 ***
## V7.Farming.fishing             -8.214e-01  2.291e-01   -3.585 0.000337 ***
## V7.Handlers.cleaners           -5.135e-01  2.399e-01   -2.140 0.032333 *  
## V7.Machine.op.inspct           -4.451e-02  1.664e-01   -0.267 0.789102    
## V7.Other.service               -6.582e-01  1.975e-01   -3.333 0.000858 ***
## V7.Priv.house.serv             -3.444e+00  2.177e+00   -1.582 0.113700    
## V7.Prof.specialty               6.800e-01  1.452e-01    4.684 2.81e-06 ***
## V7.Protective.serv              9.109e-01  1.994e-01    4.568 4.93e-06 ***
## V7.Sales                        5.680e-01  1.410e-01    4.029 5.61e-05 ***
## V7.Tech.support                 7.595e-01  1.784e-01    4.258 2.06e-05 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.475e+00  1.509e-01   -9.770  < 2e-16 ***
## V8.Not.in.family               -9.954e-01  4.341e-01   -2.293 0.021837 *  
## V8.Other.relative              -2.005e+00  3.790e-01   -5.289 1.23e-07 ***
## V8.Own.child                   -2.293e+00  4.219e-01   -5.434 5.50e-08 ***
## V8.Unmarried                   -1.206e+00  4.459e-01   -2.705 0.006829 ** 
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -5.655e-01  3.482e-01   -1.624 0.104354    
## V9.Asian.Pac.Islander           4.351e-02  2.155e-01    0.202 0.840000    
## V9.Black                       -2.071e-01  1.167e-01   -1.775 0.075929 .  
## V9.Other                       -4.616e-01  4.228e-01   -1.092 0.274958    
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.109e-01  1.188e-01   -6.827 8.69e-12 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.183e-04  1.567e-05   20.313  < 2e-16 ***
## V12                             6.529e-04  5.952e-05   10.968  < 2e-16 ***
## V13                             2.541e-02  2.712e-03    9.368  < 2e-16 ***
## V14..                          -5.368e-01  8.526e-01   -0.630 0.528925    
## V14.Cambodia                    1.519e+00  1.192e+00    1.275 0.202433    
## V14.Canada                      2.187e-01  9.252e-01    0.236 0.813088    
## V14.China                      -9.917e-01  1.050e+00   -0.944 0.345091    
## V14.Columbia                   -2.494e+01  5.303e+04    0.000 0.999625    
## V14.Cuba                        1.293e+00  9.650e-01    1.340 0.180400    
## V14.Dominican.Republic         -1.024e+00  1.383e+00   -0.740 0.459151    
## V14.Ecuador                    -1.294e+00  1.510e+00   -0.857 0.391678    
## V14.El.Salvador                -5.022e-01  1.118e+00   -0.449 0.653278    
## V14.England                     1.950e-01  9.425e-01    0.207 0.836124    
## V14.France                      1.378e+00  1.148e+00    1.200 0.230131    
## V14.Germany                     1.993e-01  9.055e-01    0.220 0.825787    
## V14.Greece                     -1.982e+00  1.190e+00   -1.666 0.095775 .  
## V14.Guatemala                  -2.422e+01  9.614e+04    0.000 0.999799    
## V14.Haiti                      -2.290e-01  1.130e+00   -0.203 0.839407    
## V14.Holand.Netherlands         -2.221e+01  3.422e+05    0.000 0.999948    
## V14.Honduras                   -2.323e+01  1.503e+05    0.000 0.999877    
## V14.Hong                        1.286e+00  2.679e+00    0.480 0.631290    
## V14.Hungary                    -9.409e-01  1.445e+00   -0.651 0.514860    
## V14.India                      -5.565e-01  9.309e-01   -0.598 0.549953    
## V14.Iran                        1.029e-01  9.853e-01    0.104 0.916841    
## V14.Ireland                     9.944e-01  1.227e+00    0.811 0.417632    
## V14.Italy                      -2.572e-01  9.760e-01   -0.263 0.792174    
## V14.Jamaica                    -2.657e-01  1.089e+00   -0.244 0.807172    
## V14.Japan                       2.288e-01  1.031e+00    0.222 0.824273    
## V14.Laos                       -2.443e+01  1.495e+05    0.000 0.999870    
## V14.Mexico                     -1.067e+00  9.894e-01   -1.078 0.280923    
## V14.Nicaragua                  -2.388e+01  1.133e+05    0.000 0.999832    
## V14.Outlying.US.Guam.USVI.etc. -2.340e+01  1.615e+05    0.000 0.999884    
## V14.Peru                       -1.338e+00  1.536e+00   -0.871 0.383728    
## V14.Philippines                 8.530e-01  9.043e-01    0.943 0.345533    
## V14.Poland                     -2.748e-01  1.008e+00   -0.273 0.785081    
## V14.Portugal                    3.493e-01  1.252e+00    0.279 0.780250    
## V14.Puerto.Rico                -1.476e+00  1.052e+00   -1.403 0.160758    
## V14.Scotland                   -7.664e-01  1.523e+00   -0.503 0.614822    
## V14.South                      -1.207e+00  1.033e+00   -1.169 0.242444    
## V14.Taiwan                     -5.648e-01  1.099e+00   -0.514 0.607329    
## V14.Thailand                   -1.059e+00  1.561e+00   -0.678 0.497672    
## V14.Trinadad.Tobago            -4.901e-01  1.660e+00   -0.295 0.767748    
## V14.United.States               1.300e-02  8.294e-01    0.016 0.987493    
## V14.Vietnam                    -2.663e+00  1.393e+00   -1.911 0.055981 .  
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15481.3  on 13265  degrees of freedom
## Residual deviance:  8855.9  on 13174  degrees of freedom
## AIC: 9039.9
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n3_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6763   927
##      >50K     653  1425
##                                           
##                Accuracy : 0.8382          
##                  95% CI : (0.8308, 0.8455)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5393          
##                                           
##  Mcnemar's Test P-Value : 6.508e-12       
##                                           
##             Sensitivity : 0.9119          
##             Specificity : 0.6059          
##          Pos Pred Value : 0.8795          
##          Neg Pred Value : 0.6858          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6924          
##    Detection Prevalence : 0.7873          
##       Balanced Accuracy : 0.7589          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6763   927
##      >50K     653  1425
##                                           
##                Accuracy : 0.8382          
##                  95% CI : (0.8308, 0.8455)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5393          
##                                           
##  Mcnemar's Test P-Value : 6.508e-12       
##                                           
##             Sensitivity : 0.9119          
##             Specificity : 0.6059          
##          Pos Pred Value : 0.8795          
##          Neg Pred Value : 0.6858          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6924          
##    Detection Prevalence : 0.7873          
##       Balanced Accuracy : 0.7589          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.382473e-01   5.392635e-01   8.307933e-01   8.454991e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.943266e-81   6.508068e-12
ad_tda_kde_5.60.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9119471            0.6058673            0.8794538 
##       Neg Pred Value            Precision               Recall 
##            0.6857555            0.8794538            0.9119471 
##                   F1           Prevalence       Detection Rate 
##            0.8954058            0.7592138            0.6923628 
## Detection Prevalence    Balanced Accuracy 
##            0.7872645            0.7589072
ad_tda_kde_5.60.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_lr_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n3_lr_fit_re)
diff_tda_kde_5.60.5_lr_n3_3_fold
##      Accuracy
## 1 0.015604497
## 2 0.007956103
## 3 0.016119051
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n3_3_fold$probRight
bst_tda_kde_5.60.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3048667
## 
## $winRight
## [1] 0.6951333
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n3_3_fold
## $left
## [1] 0.008392836
## 
## $rope
## [1] 0.1919669
## 
## $right
## [1] 0.7996403
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold))
#bf_tda_kde_5.60.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold)
## t = 5.0112, df = 2, p-value = 0.03759
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.001870099 0.024583001
## sample estimates:
##  mean of x 
## 0.01322655
### Test set diff
diff_tda_kde_5.60.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n3_test
##   Accuracy 
## 0.01504914
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n3_test_odds.left<-bst_tda_kde_5.60.5_lr.n3_test$probLeft/bst_tda_kde_5.60.5_lr.n3_test$probRight
bst_tda_kde_5.60.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n3_test)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n3_test)) #bf_tda_pca_5.60.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n3_test))

##Node4

Adult_TDA_KDE_5.60.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.60.5.n4.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n4_LrFit0
## Generalized Linear Model 
## 
## 11795 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7864, 7863, 7863 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8459493  0.5208813
Adult_TDA_KDE_5.60.5_n4_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8176037 0.4479074    Fold1
## 2 0.8596134 0.5612620    Fold2
## 3 0.8606307 0.5534746    Fold3
ad_tda_kde_5.60.5_n4_lr_fit_re<-Adult_TDA_KDE_5.60.5_n4_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (20 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                    -7.825e+15  5.926e+07 -132041302   <2e-16 ***
## V1                              2.987e+13  8.580e+04  348131951   <2e-16 ***
## V2..                            1.530e+15  4.768e+07   32094105   <2e-16 ***
## V2.Federal.gov                  4.069e+15  4.767e+07   85357515   <2e-16 ***
## V2.Local.gov                    7.326e+14  4.760e+07   15390105   <2e-16 ***
## V2.Never.worked                 4.947e+15  6.720e+07   73620966   <2e-16 ***
## V2.Private                      8.540e+14  4.751e+07   17974386   <2e-16 ***
## V2.Self.emp.inc                 1.145e+15  4.767e+07   24026636   <2e-16 ***
## V2.Self.emp.not.inc             4.142e+14  4.758e+07    8704503   <2e-16 ***
## V2.State.gov                    2.119e+15  4.764e+07   44487685   <2e-16 ***
## V2.Without.pay                         NA         NA         NA       NA    
## V3                              1.027e+09  1.119e+01   91806324   <2e-16 ***
## V4.10th                                NA         NA         NA       NA    
## V4.11th                                NA         NA         NA       NA    
## V4.12th                        -5.304e+13  6.151e+06   -8623316   <2e-16 ***
## V4.1st.4th                             NA         NA         NA       NA    
## V4.5th.6th                             NA         NA         NA       NA    
## V4.7th.8th                             NA         NA         NA       NA    
## V4.9th                                 NA         NA         NA       NA    
## V4.Assoc.acdm                   1.087e+14  3.789e+06   28691297   <2e-16 ***
## V4.Assoc.voc                    6.702e+13  2.708e+06   24750048   <2e-16 ***
## V4.Bachelors                    6.943e+14  2.237e+06  310318700   <2e-16 ***
## V4.Doctorate                           NA         NA         NA       NA    
## V4.HS.grad                     -1.991e+14  1.530e+06 -130091327   <2e-16 ***
## V4.Masters                      3.434e+15  2.142e+07  160339901   <2e-16 ***
## V4.Preschool                           NA         NA         NA       NA    
## V4.Prof.school                         NA         NA         NA       NA    
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                    -7.624e+13  6.365e+06  -11977925   <2e-16 ***
## V6.Married.AF.spouse            2.675e+15  2.231e+07  119919038   <2e-16 ***
## V6.Married.civ.spouse           1.645e+15  9.689e+06  169741764   <2e-16 ***
## V6.Married.spouse.absent        1.068e+14  8.483e+06   12596312   <2e-16 ***
## V6.Never.married                1.351e+14  6.436e+06   20987413   <2e-16 ***
## V6.Separated                    2.605e+13  6.978e+06    3732951   <2e-16 ***
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                 2.690e+14  3.390e+06   79342700   <2e-16 ***
## V7.Armed.Forces                -3.599e+15  2.788e+07 -129072122   <2e-16 ***
## V7.Craft.repair                 1.812e+14  3.247e+06   55813993   <2e-16 ***
## V7.Exec.managerial              7.603e+14  3.440e+06  220985831   <2e-16 ***
## V7.Farming.fishing             -2.029e+14  5.077e+06  -39963607   <2e-16 ***
## V7.Handlers.cleaners           -5.289e+13  4.118e+06  -12843922   <2e-16 ***
## V7.Machine.op.inspct            7.162e+13  3.660e+06   19571511   <2e-16 ***
## V7.Other.service                1.813e+14  3.473e+06   52198068   <2e-16 ***
## V7.Priv.house.serv              6.831e+14  1.421e+07   48065932   <2e-16 ***
## V7.Prof.specialty               5.913e+14  3.744e+06  157965286   <2e-16 ***
## V7.Protective.serv              6.847e+14  5.256e+06  130270589   <2e-16 ***
## V7.Sales                        4.063e+14  3.371e+06  120523413   <2e-16 ***
## V7.Tech.support                 5.056e+14  4.468e+06  113159969   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                     -4.236e+14  3.440e+06 -123126104   <2e-16 ***
## V8.Not.in.family               -1.543e+14  8.026e+06  -19222478   <2e-16 ***
## V8.Other.relative              -3.840e+13  8.027e+06   -4783925   <2e-16 ***
## V8.Own.child                    1.478e+13  7.929e+06    1864194   <2e-16 ***
## V8.Unmarried                   -1.202e+14  8.146e+06  -14758509   <2e-16 ***
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo          -2.778e+14  6.746e+06  -41188330   <2e-16 ***
## V9.Asian.Pac.Islander           8.827e+14  5.052e+06  174723046   <2e-16 ***
## V9.Black                       -7.761e+13  2.163e+06  -35881645   <2e-16 ***
## V9.Other                       -2.786e+14  7.369e+06  -37801271   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                     -9.368e+13  1.860e+06  -50364131   <2e-16 ***
## V10.Male                               NA         NA         NA       NA    
## V11                             4.594e+10  1.155e+02  397767698   <2e-16 ***
## V12                             3.150e+11  1.622e+03  194264192   <2e-16 ***
## V13                             1.047e+13  6.158e+04  169972735   <2e-16 ***
## V14..                           2.402e+15  3.390e+07   70845254   <2e-16 ***
## V14.Cambodia                    3.161e+15  4.235e+07   74644070   <2e-16 ***
## V14.Canada                      4.572e+15  3.562e+07  128352744   <2e-16 ***
## V14.China                       4.577e+14  3.714e+07   12323050   <2e-16 ***
## V14.Columbia                    9.928e+14  3.700e+07   26832292   <2e-16 ***
## V14.Cuba                        4.683e+15  3.609e+07  129752299   <2e-16 ***
## V14.Dominican.Republic          1.277e+15  3.699e+07   34517901   <2e-16 ***
## V14.Ecuador                     1.638e+15  3.942e+07   41543894   <2e-16 ***
## V14.El.Salvador                 2.134e+15  3.640e+07   58630774   <2e-16 ***
## V14.England                     2.967e+15  3.607e+07   82254355   <2e-16 ***
## V14.France                      2.320e+15  4.117e+07   56355878   <2e-16 ***
## V14.Germany                     3.456e+15  3.498e+07   98789297   <2e-16 ***
## V14.Greece                      8.869e+14  3.973e+07   22320780   <2e-16 ***
## V14.Guatemala                   3.833e+15  3.911e+07   98004056   <2e-16 ***
## V14.Haiti                       1.077e+15  3.739e+07   28812312   <2e-16 ***
## V14.Holand.Netherlands                 NA         NA         NA       NA    
## V14.Honduras                    5.376e+14  4.754e+07   11308063   <2e-16 ***
## V14.Hong                        7.819e+14  5.843e+07   13383227   <2e-16 ***
## V14.Hungary                    -9.436e+14  5.130e+07  -18393229   <2e-16 ***
## V14.India                       1.355e+15  3.689e+07   36743895   <2e-16 ***
## V14.Iran                        4.149e+15  3.715e+07  111687386   <2e-16 ***
## V14.Ireland                     4.518e+15  3.841e+07  117642930   <2e-16 ***
## V14.Italy                       3.109e+15  3.664e+07   84859846   <2e-16 ***
## V14.Jamaica                     3.019e+15  3.528e+07   85575023   <2e-16 ***
## V14.Japan                       2.437e+15  3.677e+07   66267437   <2e-16 ***
## V14.Laos                       -1.103e+15  4.368e+07  -25247146   <2e-16 ***
## V14.Mexico                      8.890e+14  3.432e+07   25899631   <2e-16 ***
## V14.Nicaragua                   4.595e+14  3.975e+07   11559935   <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc.  5.357e+15  4.752e+07  112726438   <2e-16 ***
## V14.Peru                        1.346e+15  3.923e+07   34297473   <2e-16 ***
## V14.Philippines                 1.926e+15  3.503e+07   54988866   <2e-16 ***
## V14.Poland                      4.447e+14  3.610e+07   12318391   <2e-16 ***
## V14.Portugal                    2.814e+15  4.036e+07   69724643   <2e-16 ***
## V14.Puerto.Rico                 1.095e+15  3.516e+07   31153179   <2e-16 ***
## V14.Scotland                   -4.009e+14  4.752e+07   -8435028   <2e-16 ***
## V14.South                       3.990e+14  3.592e+07   11108460   <2e-16 ***
## V14.Taiwan                      4.044e+14  3.846e+07   10515566   <2e-16 ***
## V14.Thailand                    2.004e+15  4.068e+07   49253826   <2e-16 ***
## V14.Trinadad.Tobago             1.742e+12  3.980e+07      43764   <2e-16 ***
## V14.United.States               1.049e+15  3.359e+07   31225629   <2e-16 ***
## V14.Vietnam                     3.687e+15  3.567e+07  103368201   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  12245  on 11794  degrees of freedom
## Residual deviance: 175821  on 11706  degrees of freedom
## AIC: 175999
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n4_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7143  1859
##      >50K     273   493
##                                           
##                Accuracy : 0.7817          
##                  95% CI : (0.7734, 0.7899)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 7.595e-08       
##                                           
##                   Kappa : 0.2245          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9632          
##             Specificity : 0.2096          
##          Pos Pred Value : 0.7935          
##          Neg Pred Value : 0.6436          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7313          
##    Detection Prevalence : 0.9216          
##       Balanced Accuracy : 0.5864          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7143  1859
##      >50K     273   493
##                                           
##                Accuracy : 0.7817          
##                  95% CI : (0.7734, 0.7899)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 7.595e-08       
##                                           
##                   Kappa : 0.2245          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9632          
##             Specificity : 0.2096          
##          Pos Pred Value : 0.7935          
##          Neg Pred Value : 0.6436          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7313          
##    Detection Prevalence : 0.9216          
##       Balanced Accuracy : 0.5864          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.817363e-01   2.244778e-01   7.734102e-01   7.898941e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   7.595150e-08  3.106255e-258
ad_tda_kde_5.60.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9631877            0.2096088            0.7934903 
##       Neg Pred Value            Precision               Recall 
##            0.6436031            0.7934903            0.9631877 
##                   F1           Prevalence       Detection Rate 
##            0.8701425            0.7592138            0.7312654 
## Detection Prevalence    Balanced Accuracy 
##            0.9215807            0.5863983
ad_tda_kde_5.60.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n4_lr_fit_re)
diff_tda_kde_5.60.5_lr_n4_3_fold
##       Accuracy
## 1  0.032093626
## 2 -0.011383864
## 3 -0.005274049
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n4_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n4_3_fold$probRight
bst_tda_kde_5.60.5_lr.n4_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n4_3_fold
## $winLeft
## [1] 0.0747
## 
## $winRope
## [1] 0.4485
## 
## $winRight
## [1] 0.4768
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n4_3_fold
## $left
## [1] 0.2181407
## 
## $rope
## [1] 0.388718
## 
## $right
## [1] 0.3931413
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold))
#bf_tda_kde_5.60.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold)
## t = 0.37863, df = 2, p-value = 0.7414
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.05332411  0.06361459
## sample estimates:
##   mean of x 
## 0.005145237
### Test set diff
diff_tda_kde_5.60.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n4_test
##  Accuracy 
## 0.0715602
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n4_test_odds.left<-bst_tda_kde_5.60.5_lr.n4_test$probLeft/bst_tda_kde_5.60.5_lr.n4_test$probRight
bst_tda_kde_5.60.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1543667
## 
## $winRight
## [1] 0.8456333
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n4_test)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n4_test)) #bf_tda_pca_5.60.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n4_test))

##Node5

Adult_TDA_KDE_5.60.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.60.5.n5.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n5_LrFit0
## Generalized Linear Model 
## 
## 8940 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5960, 5960, 5960 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8709172  0.4072427
Adult_TDA_KDE_5.60.5_n5_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8674497 0.4000571    Fold1
## 2 0.8734899 0.4200898    Fold2
## 3 0.8718121 0.4015813    Fold3
ad_tda_kde_5.60.5_n5_lr_fit_re<-Adult_TDA_KDE_5.60.5_n5_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (22 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -1.404e+13  1.235e+13  -1.137 0.255591    
## V1                              7.283e-02  5.831e-03  12.490  < 2e-16 ***
## V2..                            1.404e+13  1.235e+13   1.137 0.255591    
## V2.Federal.gov                  1.404e+13  1.235e+13   1.137 0.255591    
## V2.Local.gov                    1.404e+13  1.235e+13   1.137 0.255591    
## V2.Never.worked                 1.404e+13  1.235e+13   1.137 0.255591    
## V2.Private                      1.404e+13  1.235e+13   1.137 0.255591    
## V2.Self.emp.inc                 1.404e+13  1.235e+13   1.137 0.255591    
## V2.Self.emp.not.inc             1.404e+13  1.235e+13   1.137 0.255591    
## V2.State.gov                    1.404e+13  1.235e+13   1.137 0.255591    
## V2.Without.pay                  1.404e+13  1.235e+13   1.137 0.255591    
## V3                              1.649e-06  8.285e-07   1.990 0.046598 *  
## V4.10th                                NA         NA      NA       NA    
## V4.11th                                NA         NA      NA       NA    
## V4.12th                        -1.403e+00  1.046e+00  -1.341 0.179793    
## V4.1st.4th                             NA         NA      NA       NA    
## V4.5th.6th                             NA         NA      NA       NA    
## V4.7th.8th                             NA         NA      NA       NA    
## V4.9th                                 NA         NA      NA       NA    
## V4.Assoc.acdm                          NA         NA      NA       NA    
## V4.Assoc.voc                    2.923e-01  1.534e-01   1.906 0.056690 .  
## V4.Bachelors                           NA         NA      NA       NA    
## V4.Doctorate                           NA         NA      NA       NA    
## V4.HS.grad                     -3.118e-01  7.937e-02  -3.929 8.53e-05 ***
## V4.Masters                             NA         NA      NA       NA    
## V4.Preschool                           NA         NA      NA       NA    
## V4.Prof.school                         NA         NA      NA       NA    
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -4.585e-01  7.803e-01  -0.588 0.556833    
## V6.Married.AF.spouse            2.300e+00  1.278e+00   1.800 0.071891 .  
## V6.Married.civ.spouse           2.139e+00  9.042e-01   2.366 0.017983 *  
## V6.Married.spouse.absent       -6.342e-01  1.078e+00  -0.588 0.556497    
## V6.Never.married               -7.738e-01  7.871e-01  -0.983 0.325538    
## V6.Separated                   -6.393e-01  8.631e-01  -0.741 0.458897    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 3.840e-01  1.895e-01   2.026 0.042791 *  
## V7.Armed.Forces                -2.343e+01  1.285e+05   0.000 0.999855    
## V7.Craft.repair                 3.018e-01  1.525e-01   1.978 0.047873 *  
## V7.Exec.managerial              8.867e-01  1.687e-01   5.255 1.48e-07 ***
## V7.Farming.fishing             -2.974e-01  2.921e-01  -1.018 0.308570    
## V7.Handlers.cleaners           -4.953e-01  2.509e-01  -1.975 0.048315 *  
## V7.Machine.op.inspct            1.014e-01  1.823e-01   0.556 0.578245    
## V7.Other.service               -4.206e-01  2.309e-01  -1.822 0.068522 .  
## V7.Priv.house.serv             -2.354e+01  6.361e+04   0.000 0.999705    
## V7.Prof.specialty               1.081e+00  2.157e-01   5.012 5.38e-07 ***
## V7.Protective.serv              8.696e-01  2.517e-01   3.455 0.000551 ***
## V7.Sales                        4.338e-01  1.710e-01   2.537 0.011180 *  
## V7.Tech.support                 1.195e+00  2.385e-01   5.012 5.39e-07 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.355e+00  2.669e-01  -5.076 3.85e-07 ***
## V8.Not.in.family               -7.169e-01  5.348e-01  -1.340 0.180123    
## V8.Other.relative              -1.711e+00  5.354e-01  -3.195 0.001397 ** 
## V8.Own.child                   -1.679e+00  4.963e-01  -3.384 0.000715 ***
## V8.Unmarried                   -1.372e+00  5.799e-01  -2.365 0.018010 *  
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -1.345e+00  5.176e-01  -2.598 0.009386 ** 
## V9.Asian.Pac.Islander           1.945e-01  4.062e-01   0.479 0.632063    
## V9.Black                       -5.195e-01  1.794e-01  -2.895 0.003793 ** 
## V9.Other                       -1.652e+00  8.126e-01  -2.033 0.042062 *  
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.196e-01  2.329e-01  -3.520 0.000432 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.571e-04  2.342e-05  15.246  < 2e-16 ***
## V12                             5.365e-04  8.044e-05   6.669 2.57e-11 ***
## V13                             3.017e-02  3.756e-03   8.033 9.51e-16 ***
## V14..                          -1.543e+00  1.401e+00  -1.101 0.270824    
## V14.Cambodia                   -8.429e-01  1.956e+00  -0.431 0.666546    
## V14.Canada                     -8.436e-01  1.503e+00  -0.561 0.574696    
## V14.China                      -1.091e+00  1.679e+00  -0.650 0.515592    
## V14.Columbia                   -2.559e+01  7.863e+04   0.000 0.999740    
## V14.Cuba                        4.458e-01  1.492e+00   0.299 0.765048    
## V14.Dominican.Republic         -1.675e+01  1.001e+03  -0.017 0.986647    
## V14.Ecuador                    -3.927e-01  1.631e+00  -0.241 0.809662    
## V14.El.Salvador                -2.857e+00  1.753e+00  -1.629 0.103224    
## V14.England                    -7.067e-01  1.627e+00  -0.434 0.664082    
## V14.France                     -2.323e+01  1.530e+05   0.000 0.999879    
## V14.Germany                    -1.102e+00  1.462e+00  -0.754 0.451064    
## V14.Greece                     -3.384e+00  1.891e+00  -1.789 0.073559 .  
## V14.Guatemala                  -1.475e+00  1.827e+00  -0.808 0.419263    
## V14.Haiti                      -2.485e+01  8.912e+04   0.000 0.999778    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                   -2.423e+01  1.893e+05   0.000 0.999898    
## V14.Hong                       -2.640e+01  1.333e+05   0.000 0.999842    
## V14.Hungary                    -2.693e+01  2.282e+05   0.000 0.999906    
## V14.India                      -2.796e+00  2.010e+00  -1.391 0.164229    
## V14.Iran                        6.867e-01  1.625e+00   0.423 0.672495    
## V14.Ireland                    -1.407e+00  1.763e+00  -0.798 0.424838    
## V14.Italy                      -2.642e+00  1.853e+00  -1.426 0.153821    
## V14.Jamaica                    -3.224e-01  1.534e+00  -0.210 0.833549    
## V14.Japan                      -2.593e+01  8.401e+04   0.000 0.999754    
## V14.Laos                       -2.642e+01  1.500e+05   0.000 0.999859    
## V14.Mexico                     -1.844e+00  1.454e+00  -1.268 0.204749    
## V14.Nicaragua                  -2.524e+01  9.654e+04   0.000 0.999791    
## V14.Outlying.US.Guam.USVI.etc. -2.408e+01  1.214e+05   0.000 0.999842    
## V14.Peru                       -2.565e+01  1.102e+05   0.000 0.999814    
## V14.Philippines                -8.706e-01  1.538e+00  -0.566 0.571416    
## V14.Poland                     -2.328e+00  1.750e+00  -1.330 0.183443    
## V14.Portugal                   -8.527e-01  1.652e+00  -0.516 0.605716    
## V14.Puerto.Rico                -2.147e+00  1.591e+00  -1.349 0.177356    
## V14.Scotland                   -2.647e+01  1.802e+05   0.000 0.999883    
## V14.South                      -1.774e+00  1.684e+00  -1.053 0.292268    
## V14.Taiwan                     -2.328e+00  1.917e+00  -1.215 0.224526    
## V14.Thailand                   -1.935e+00  2.031e+00  -0.953 0.340831    
## V14.Trinadad.Tobago            -2.633e+01  1.455e+05   0.000 0.999856    
## V14.United.States              -1.167e+00  1.359e+00  -0.859 0.390520    
## V14.Vietnam                    -1.930e+00  1.809e+00  -1.067 0.286045    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7692.0  on 8939  degrees of freedom
## Residual deviance: 4776.9  on 8853  degrees of freedom
## AIC: 4950.9
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n5_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6568   911
##      >50K     848  1441
##                                           
##                Accuracy : 0.8199          
##                  95% CI : (0.8122, 0.8275)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5029          
##                                           
##  Mcnemar's Test P-Value : 0.1393          
##                                           
##             Sensitivity : 0.8857          
##             Specificity : 0.6127          
##          Pos Pred Value : 0.8782          
##          Neg Pred Value : 0.6295          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6724          
##    Detection Prevalence : 0.7657          
##       Balanced Accuracy : 0.7492          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6568   911
##      >50K     848  1441
##                                           
##                Accuracy : 0.8199          
##                  95% CI : (0.8122, 0.8275)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5029          
##                                           
##  Mcnemar's Test P-Value : 0.1393          
##                                           
##             Sensitivity : 0.8857          
##             Specificity : 0.6127          
##          Pos Pred Value : 0.8782          
##          Neg Pred Value : 0.6295          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6724          
##    Detection Prevalence : 0.7657          
##       Balanced Accuracy : 0.7492          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.199222e-01   5.029221e-01   8.121559e-01   8.274973e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   7.911664e-48   1.393310e-01
ad_tda_kde_5.60.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8856526            0.6126701            0.8781923 
##       Neg Pred Value            Precision               Recall 
##            0.6295325            0.8781923            0.8856526 
##                   F1           Prevalence       Detection Rate 
##            0.8819067            0.7592138            0.6723997 
## Detection Prevalence    Balanced Accuracy 
##            0.7656634            0.7491614
ad_tda_kde_5.60.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n5_lr_fit_re)
diff_tda_kde_5.60.5_lr_n5_3_fold
##      Accuracy
## 1 -0.01775238
## 2 -0.02526037
## 3 -0.01645541
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n5_3_fold$probRight
bst_tda_kde_5.60.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n5_3_fold
## $winLeft
## [1] 0.9106
## 
## $winRope
## [1] 0.0894
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n5_3_fold
## $left
## [1] 0.9548888
## 
## $rope
## [1] 0.03955913
## 
## $right
## [1] 0.005552066
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold))
#bf_tda_kde_5.60.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold)
## t = -7.2227, df = 2, p-value = 0.01863
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.031631277 -0.008014158
## sample estimates:
##   mean of x 
## -0.01982272
### Test set diff
diff_tda_kde_5.60.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n5_test
##   Accuracy 
## 0.03337428
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_lr.n5_test_odds.left<-bst_tda_kde_5.60.5_lr.n5_test$probLeft/bst_tda_kde_5.60.5_lr.n5_test$probRight
bst_tda_kde_5.60.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1586
## 
## $winRight
## [1] 0.8414
# Bayesian Correlated Test

bct_tda_kde_5.60.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n5_test)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n5_test)) #bf_tda_pca_5.60.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n5_test))


#naiveBayes 
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Haiti, V14.Holand.Netherlands, V14.Hungary, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Guatemala, V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15196, 15195 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.7648843  0.03503428
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
adultNbFit$resample
##    Accuracy      Kappa Resample
## 1 0.7591471 0.00000000    Fold1
## 2 0.7735948 0.08778280    Fold2
## 3 0.7619110 0.01732004    Fold3
ad_nb_fit_re<-adultNbFit$resample[1]

summary(adultNbFit)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
#varImp (adultNbFit)



# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nb_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2219
##      >50K       0   133
##                                           
##                Accuracy : 0.7728          
##                  95% CI : (0.7644, 0.7811)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0008047       
##                                           
##                   Kappa : 0.0834          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.05655         
##          Pos Pred Value : 0.76969         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.98638         
##       Balanced Accuracy : 0.52827         
##                                           
##        'Positive' Class :  <=50K          
## 
nb_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.772829648    0.083417992    0.764388511    0.781107809    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.000804745    0.000000000
nb_cf_ov_acc<-nb_cf$overall[1]
nb_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           1.00000000           0.05654762           0.76969382 
##       Neg Pred Value            Precision               Recall 
##           1.00000000           0.76969382           1.00000000 
##                   F1           Prevalence       Detection Rate 
##           0.86986101           0.75921376           0.75921376 
## Detection Prevalence    Balanced Accuracy 
##           0.98638411           0.52827381
nb_cf_pre_rec_f1<-nb_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.60.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Handlers.cleaners, V7.Priv.house.serv, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.11th, V4.1st.4th, V4.5th.6th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Handlers.cleaners, V7.Priv.house.serv, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V10.Female, V10.Male, V14.Cambodia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.11th, V4.1st.4th, V4.5th.6th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Handlers.cleaners, V7.Priv.house.serv, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V10.Female, V10.Male, V14.Cambodia, V14.China, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.Germany, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n1_NbFit0
## Naive Bayes 
## 
## 6560 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 4374, 4373, 4373 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.8948171    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.60.5_n1_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.8947850     0    Fold1
## 2 0.8948331     0    Fold2
## 3 0.8948331     0    Fold3
ad_tda_pc_5.60.5_n1_nb_fit_re<-Adult_TDA_PC_5.60.5_n1_NbFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.60.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.60.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.60.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n1_nb_fit_re)
diff_tda_pca_5.60.5_nb_n1_3_fold
##     Accuracy
## 1 -0.1356379
## 2 -0.1212383
## 3 -0.1329221
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n1_3_fold$probRight
bst_tda_pca_5.60.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n1_3_fold
## $winLeft
## [1] 0.9909667
## 
## $winRope
## [1] 0.009033333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n1_3_fold
## $left
## [1] 0.9990981
## 
## $rope
## [1] 0.0002389229
## 
## $right
## [1] 0.0006630265
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold))
#bf_tda_pca_5.60.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold)
## t = -29.414, df = 2, p-value = 0.001154
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1489391 -0.1109264
## sample estimates:
##  mean of x 
## -0.1299327
### Test set diff
diff_tda_pca_5.60.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n1_test
##  Accuracy 
## 0.5320434
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n1_test_odds.left<-bst_tda_pca_5.60.5_nb.n1_test$probLeft/bst_tda_pca_5.60.5_nb.n1_test$probRight
bst_tda_pca_5.60.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1584333
## 
## $winRight
## [1] 0.8415667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n1_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n1_test)) #bf_tda_pca_5.60.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n1_test))

##Node2

Adult_TDA_PC_5.60.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Scotland
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n2_NbFit0
## Naive Bayes 
## 
## 13933 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9288, 9289, 9289 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.5162561  0.04903134
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.60.5_n2_NbFit0$resample
##    Accuracy      Kappa Resample
## 1 0.5194833 0.05526457    Fold1
## 2 0.5217485 0.05982785    Fold2
## 3 0.5075366 0.03200161    Fold3
ad_tda_pc_5.60.5_n2_nb_fit_re<-Adult_TDA_PC_5.60.5_n2_NbFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.60.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7409  2304
##      >50K       7    48
##                                           
##                Accuracy : 0.7634          
##                  95% CI : (0.7549, 0.7718)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.169           
##                                           
##                   Kappa : 0.0292          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99906         
##             Specificity : 0.02041         
##          Pos Pred Value : 0.76279         
##          Neg Pred Value : 0.87273         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75850         
##    Detection Prevalence : 0.99437         
##       Balanced Accuracy : 0.50973         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7409  2304
##      >50K       7    48
##                                           
##                Accuracy : 0.7634          
##                  95% CI : (0.7549, 0.7718)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.169           
##                                           
##                   Kappa : 0.0292          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99906         
##             Specificity : 0.02041         
##          Pos Pred Value : 0.76279         
##          Neg Pred Value : 0.87273         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75850         
##    Detection Prevalence : 0.99437         
##       Balanced Accuracy : 0.50973         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76341114     0.02920106     0.75485419     0.77181076     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.16896721     0.00000000
ad_tda_pc_5.60.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99905609           0.02040816           0.76279213 
##       Neg Pred Value            Precision               Recall 
##           0.87272727           0.76279213           0.99905609 
##                   F1           Prevalence       Detection Rate 
##           0.86508261           0.75921376           0.75849713 
## Detection Prevalence    Balanced Accuracy 
##           0.99436937           0.50973213
ad_tda_pc_5.60.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n2_nb_fit_re)
diff_tda_pca_5.60.5_nb_n2_3_fold
##    Accuracy
## 1 0.2396638
## 2 0.2518463
## 3 0.2543744
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n2_3_fold$probRight
bst_tda_pca_5.60.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009733333
## 
## $winRight
## [1] 0.9902667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n2_3_fold
## $left
## [1] 0.0002054151
## 
## $rope
## [1] 3.584964e-05
## 
## $right
## [1] 0.9997587
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold))
#bf_tda_pca_5.60.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold)
## t = 54.749, df = 2, p-value = 0.0003334
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2290890 0.2681674
## sample estimates:
## mean of x 
## 0.2486282
### Test set diff
diff_tda_pca_5.60.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n2_test
##    Accuracy 
## 0.009418509
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n2_test_odds.left<-bst_tda_pca_5.60.5_nb.n2_test$probLeft/bst_tda_pca_5.60.5_nb.n2_test$probRight
bst_tda_pca_5.60.5_nb.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n2_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n2_test)) #bf_tda_pca_5.60.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n2_test))

##Node3

Adult_TDA_PC_5.60.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n3_NbFit0
## Naive Bayes 
## 
## 15744 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10496, 10497, 10495 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa      
##   FALSE            NaN          NaN
##    TRUE      0.7447282  0.003313477
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.60.5_n3_NbFit0$resample
##    Accuracy       Kappa Resample
## 1 0.7458079 0.009940431    Fold1
## 2 0.7442348 0.000000000    Fold2
## 3 0.7441417 0.000000000    Fold3
ad_tda_pc_5.60.5_n3_nb_fit_re<-Adult_TDA_PC_5.60.5_n3_NbFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.60.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2205
##      >50K       0   147
##                                           
##                Accuracy : 0.7743          
##                  95% CI : (0.7658, 0.7825)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0002414       
##                                           
##                   Kappa : 0.0919          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0625          
##          Pos Pred Value : 0.7708          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 0.9850          
##       Balanced Accuracy : 0.5312          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2205
##      >50K       0   147
##                                           
##                Accuracy : 0.7743          
##                  95% CI : (0.7658, 0.7825)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0002414       
##                                           
##                   Kappa : 0.0919          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0625          
##          Pos Pred Value : 0.7708          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 0.9850          
##       Balanced Accuracy : 0.5312          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.7742628993   0.0919232485   0.7658399046   0.7825220598   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   0.0002413576   0.0000000000
ad_tda_pc_5.60.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0625000            0.7708138 
##       Neg Pred Value            Precision               Recall 
##            1.0000000            0.7708138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8705758            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            0.9849509            0.5312500
ad_tda_pc_5.60.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n3_nb_fit_re)
diff_tda_pca_5.60.5_nb_n3_3_fold
##     Accuracy
## 1 0.01333922
## 2 0.02936004
## 3 0.01776929
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n3_3_fold$probRight
bst_tda_pca_5.60.5_nb.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.09133333
## 
## $winRight
## [1] 0.9086667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n3_3_fold
## $left
## [1] 0.0159292
## 
## $rope
## [1] 0.08751828
## 
## $right
## [1] 0.8965525
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold))
#bf_tda_pca_5.60.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold)
## t = 4.22, df = 2, p-value = 0.05183
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.0003946547  0.0407070175
## sample estimates:
##  mean of x 
## 0.02015618
### Test set diff
diff_tda_pca_5.60.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n3_test
##     Accuracy 
## -0.001433251
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n3_test_odds.left<-bst_tda_pca_5.60.5_nb.n3_test$probLeft/bst_tda_pca_5.60.5_nb.n3_test$probRight
bst_tda_pca_5.60.5_nb.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n3_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n3_test)) #bf_tda_pca_5.60.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n3_test))

##Node4

Adult_TDA_PC_5.60.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V14.Cambodia, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Iran, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n4_NbFit0
## Naive Bayes 
## 
## 19829 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 13219, 13220, 13219 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9351455    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.60.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9350983     0    Fold1
## 2 0.9352398     0    Fold2
## 3 0.9350983     0    Fold3
ad_tda_pc_5.60.5_n4_nb_fit_re<-Adult_TDA_PC_5.60.5_n4_NbFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.60.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.60.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.60.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_nb_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n4_nb_fit_re)
diff_tda_pca_5.60.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1759512
## 2 -0.1616450
## 3 -0.1731873
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n4_3_fold$probRight
bst_tda_pca_5.60.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n4_3_fold
## $winLeft
## [1] 0.9905
## 
## $winRope
## [1] 0.0095
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n4_3_fold
## $left
## [1] 0.9995025
## 
## $rope
## [1] 0.0001041544
## 
## $right
## [1] 0.0003933754
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold))
#bf_tda_pca_5.60.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold)
## t = -38.86, df = 2, p-value = 0.0006615
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1891126 -0.1514097
## sample estimates:
##  mean of x 
## -0.1702612
### Test set diff
diff_tda_pca_5.60.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n4_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n4_test_odds.left<-bst_tda_pca_5.60.5_nb.n4_test$probLeft/bst_tda_pca_5.60.5_nb.n4_test$probRight
bst_tda_pca_5.60.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4523333
## 
## $winRight
## [1] 0.5476667
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n4_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n4_test)) #bf_tda_pca_5.60.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n4_test))

##Node5

Adult_TDA_PC_5.60.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Handlers.cleaners, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Italy, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Machine.op.inspct, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Italy, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Priv.house.serv, V7.Transport.moving, V8.Husband, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n5_NbFit0
## Naive Bayes 
## 
## 16508 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11005, 11005, 11006 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy  Kappa
##   FALSE           NaN  NaN  
##    TRUE      0.992125    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.60.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9921861     0    Fold1
## 2 0.9920044     0    Fold2
## 3 0.9921847     0    Fold3
ad_tda_pc_5.60.5_n5_nb_fit_re<-Adult_TDA_PC_5.60.5_n5_NbFit0$resample[1]

summary(Adult_TDA_PC_5.60.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.60.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.60.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.60.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.60.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.60.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n5_nb_fit_re)
diff_tda_pca_5.60.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2330389
## 2 -0.2184095
## 3 -0.2302736
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n5_3_fold$probRight
bst_tda_pca_5.60.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n5_3_fold
## $winLeft
## [1] 0.9907
## 
## $winRope
## [1] 0.0093
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n5_3_fold
## $left
## [1] 0.9997158
## 
## $rope
## [1] 4.586246e-05
## 
## $right
## [1] 0.0002383219
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold))
#bf_tda_pca_5.60.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold)
## t = -50.642, df = 2, p-value = 0.0003897
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2465474 -0.2079340
## sample estimates:
##  mean of x 
## -0.2272407
### Test set diff
diff_tda_pca_5.60.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n5_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.60.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.60.5_nb.n5_test_odds.left<-bst_tda_pca_5.60.5_nb.n5_test$probLeft/bst_tda_pca_5.60.5_nb.n5_test$probRight
bst_tda_pca_5.60.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.60.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.457
## 
## $winRight
## [1] 0.543
# Bayesian Correlated Test

bct_tda_pca_5.60.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n5_test)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n5_test)) #bf_tda_pca_5.60.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.60.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Scotland
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n1_NbFit0
## Naive Bayes 
## 
## 15260 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10174, 10172, 10174 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.7506543  0.05894399
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.60.5_n1_NbFit0$resample
##    Accuracy      Kappa Resample
## 1 0.7445930 0.02553800    Fold1
## 2 0.7582547 0.10063763    Fold2
## 3 0.7491152 0.05065635    Fold3
ad_tda_kde_5.60.5_n1_nb_fit_re<-Adult_TDA_KDE_5.60.5_n1_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.60.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2314
##      >50K       0    38
##                                           
##                Accuracy : 0.7631          
##                  95% CI : (0.7545, 0.7715)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.1875          
##                                           
##                   Kappa : 0.0243          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.01616         
##          Pos Pred Value : 0.76218         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.99611         
##       Balanced Accuracy : 0.50808         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2314
##      >50K       0    38
##                                           
##                Accuracy : 0.7631          
##                  95% CI : (0.7545, 0.7715)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.1875          
##                                           
##                   Kappa : 0.0243          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.01616         
##          Pos Pred Value : 0.76218         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.99611         
##       Balanced Accuracy : 0.50808         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76310401     0.02432864     0.75454338     0.77150750     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.18754208     0.00000000
ad_tda_kde_5.60.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.60.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n1_nb_fit_re)
diff_tda_kde_5.60.5_nb_n1_3_fold
##     Accuracy
## 1 0.01455414
## 2 0.01534012
## 3 0.01279581
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n1_3_fold$probRight
bst_tda_kde_5.60.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1689667
## 
## $winRight
## [1] 0.8310333
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n1_3_fold
## $left
## [1] 0.000641164
## 
## $rope
## [1] 0.0191911
## 
## $right
## [1] 0.9801677
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold))
#bf_tda_kde_5.60.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold)
## t = 18.919, df = 2, p-value = 0.002782
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01099380 0.01746625
## sample estimates:
##  mean of x 
## 0.01423003
### Test set diff
diff_tda_kde_5.60.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n1_test
##    Accuracy 
## 0.009725635
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n1_test_odds.left<-bst_tda_kde_5.60.5_nb.n1_test$probLeft/bst_tda_kde_5.60.5_nb.n1_test$probRight
bst_tda_kde_5.60.5_nb.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n1_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n1_test)) #bf_tda_pca_5.60.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n1_test))

##Node2

Adult_TDA_KDE_5.60.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Columbia, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n2_NbFit0
## Naive Bayes 
## 
## 13933 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9288, 9289, 9289 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.5231467  0.06233996
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.60.5_n2_NbFit0$resample
##    Accuracy      Kappa Resample
## 1 0.5207750 0.05776039    Fold1
## 2 0.5269165 0.06981230    Fold2
## 3 0.5217485 0.05944720    Fold3
ad_tda_kde_5.60.5_n2_nb_fit_re<-Adult_TDA_KDE_5.60.5_n2_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.60.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7409  2304
##      >50K       7    48
##                                           
##                Accuracy : 0.7634          
##                  95% CI : (0.7549, 0.7718)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.169           
##                                           
##                   Kappa : 0.0292          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99906         
##             Specificity : 0.02041         
##          Pos Pred Value : 0.76279         
##          Neg Pred Value : 0.87273         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75850         
##    Detection Prevalence : 0.99437         
##       Balanced Accuracy : 0.50973         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7409  2304
##      >50K       7    48
##                                           
##                Accuracy : 0.7634          
##                  95% CI : (0.7549, 0.7718)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.169           
##                                           
##                   Kappa : 0.0292          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99906         
##             Specificity : 0.02041         
##          Pos Pred Value : 0.76279         
##          Neg Pred Value : 0.87273         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75850         
##    Detection Prevalence : 0.99437         
##       Balanced Accuracy : 0.50973         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76341114     0.02920106     0.75485419     0.77181076     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.16896721     0.00000000
ad_tda_kde_5.60.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99905609           0.02040816           0.76279213 
##       Neg Pred Value            Precision               Recall 
##           0.87272727           0.76279213           0.99905609 
##                   F1           Prevalence       Detection Rate 
##           0.86508261           0.75921376           0.75849713 
## Detection Prevalence    Balanced Accuracy 
##           0.99436937           0.50973213
ad_tda_kde_5.60.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n2_nb_fit_re)
diff_tda_kde_5.60.5_nb_n2_3_fold
##    Accuracy
## 1 0.2383721
## 2 0.2466784
## 3 0.2401625
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n2_3_fold$probRight
bst_tda_kde_5.60.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0102
## 
## $winRight
## [1] 0.9898
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n2_3_fold
## $left
## [1] 6.699611e-05
## 
## $rope
## [1] 1.206029e-05
## 
## $right
## [1] 0.9999209
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold))
#bf_tda_kde_5.60.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold)
## t = 95.782, df = 2, p-value = 0.000109
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2308785 0.2525969
## sample estimates:
## mean of x 
## 0.2417377
### Test set diff
diff_tda_kde_5.60.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n2_test
##    Accuracy 
## 0.009418509
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n2_test_odds.left<-bst_tda_kde_5.60.5_nb.n2_test$probLeft/bst_tda_kde_5.60.5_nb.n2_test$probRight
bst_tda_kde_5.60.5_nb.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n2_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n2_test)) #bf_tda_kde_5.60.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n2_test))

##Node3

Adult_TDA_KDE_5.60.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.El.Salvador, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Ecuador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Vietnam
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n3_NbFit0
## Naive Bayes 
## 
## 15744 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 10497, 10495, 10496 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa      
##   FALSE            NaN          NaN
##    TRUE      0.7444106  0.001476707
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.60.5_n3_NbFit0$resample
##    Accuracy       Kappa Resample
## 1 0.7449971 0.004430121    Fold1
## 2 0.7441417 0.000000000    Fold2
## 3 0.7440930 0.000000000    Fold3
ad_tda_kde_5.60.5_n3_nb_fit_re<-Adult_TDA_KDE_5.60.5_n3_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.60.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2205
##      >50K       0   147
##                                           
##                Accuracy : 0.7743          
##                  95% CI : (0.7658, 0.7825)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0002414       
##                                           
##                   Kappa : 0.0919          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0625          
##          Pos Pred Value : 0.7708          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 0.9850          
##       Balanced Accuracy : 0.5312          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2205
##      >50K       0   147
##                                           
##                Accuracy : 0.7743          
##                  95% CI : (0.7658, 0.7825)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0002414       
##                                           
##                   Kappa : 0.0919          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0625          
##          Pos Pred Value : 0.7708          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 0.9850          
##       Balanced Accuracy : 0.5312          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.7742628993   0.0919232485   0.7658399046   0.7825220598   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   0.0002413576   0.0000000000
ad_tda_kde_5.60.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0625000            0.7708138 
##       Neg Pred Value            Precision               Recall 
##            1.0000000            0.7708138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8705758            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            0.9849509            0.5312500
ad_tda_kde_5.60.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n3_nb_fit_re)
diff_tda_kde_5.60.5_nb_n3_3_fold
##     Accuracy
## 1 0.01415000
## 2 0.02945310
## 3 0.01781804
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n3_3_fold$probRight
bst_tda_kde_5.60.5_nb.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0875
## 
## $winRight
## [1] 0.9125
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n3_3_fold
## $left
## [1] 0.01460946
## 
## $rope
## [1] 0.07947101
## 
## $right
## [1] 0.9059195
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold))
#bf_tda_kde_5.60.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold)
## t = 4.4384, df = 2, p-value = 0.0472
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.000626148 0.040321281
## sample estimates:
##  mean of x 
## 0.02047371
### Test set diff
diff_tda_kde_5.60.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n3_test
##     Accuracy 
## -0.001433251
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n3_test_odds.left<-bst_tda_kde_5.60.5_nb.n3_test$probLeft/bst_tda_kde_5.60.5_nb.n3_test$probRight
bst_tda_kde_5.60.5_nb.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n3_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n3_test)) #bf_tda_kde_5.60.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n3_test))


##Node4

Adult_TDA_KDE_5.60.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Iran, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n4_NbFit0
## Naive Bayes 
## 
## 19829 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 13219, 13220, 13219 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9351455    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.60.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9350983     0    Fold1
## 2 0.9352398     0    Fold2
## 3 0.9350983     0    Fold3
ad_tda_kde_5.60.5_n4_nb_fit_re<-Adult_TDA_KDE_5.60.5_n4_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.60.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.60.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.60.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n4_nb_fit_re)
diff_tda_kde_5.60.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1759512
## 2 -0.1616450
## 3 -0.1731873
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n4_3_fold$probRight
bst_tda_kde_5.60.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n4_3_fold
## $winLeft
## [1] 0.9909
## 
## $winRope
## [1] 0.0091
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n4_3_fold
## $left
## [1] 0.9995025
## 
## $rope
## [1] 0.0001041544
## 
## $right
## [1] 0.0003933754
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold))
#bf_tda_kde_5.60.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold)
## t = -38.86, df = 2, p-value = 0.0006615
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1891126 -0.1514097
## sample estimates:
##  mean of x 
## -0.1702612
### Test set diff
diff_tda_kde_5.60.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n4_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n4_test_odds.left<-bst_tda_kde_5.60.5_nb.n4_test$probLeft/bst_tda_kde_5.60.5_nb.n4_test$probRight
bst_tda_kde_5.60.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4578667
## 
## $winRight
## [1] 0.5421333
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n4_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n4_test)) #bf_tda_kde_5.60.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n4_test))

##Node5

Adult_TDA_KDE_5.60.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Germany, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Italy, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.5th.6th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Handlers.cleaners, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Priv.house.serv, V7.Transport.moving, V8.Husband, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Italy, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n5_NbFit0
## Naive Bayes 
## 
## 16508 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11005, 11006, 11005 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy  Kappa
##   FALSE           NaN  NaN  
##    TRUE      0.992125    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.60.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9921861     0    Fold1
## 2 0.9921847     0    Fold2
## 3 0.9920044     0    Fold3
ad_tda_kde_5.60.5_n5_nb_fit_re<-Adult_TDA_KDE_5.60.5_n5_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.60.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.60.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.60.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.60.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.60.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.60.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n5_nb_fit_re)
diff_tda_kde_5.60.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2330389
## 2 -0.2185898
## 3 -0.2300933
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n5_3_fold$probRight
bst_tda_kde_5.60.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n5_3_fold
## $winLeft
## [1] 0.9912
## 
## $winRope
## [1] 0.0088
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n5_3_fold
## $left
## [1] 0.9997257
## 
## $rope
## [1] 4.426543e-05
## 
## $right
## [1] 0.0002300162
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold))
#bf_tda_kde_5.60.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold)
## t = -51.549, df = 2, p-value = 0.0003761
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2462078 -0.2082736
## sample estimates:
##  mean of x 
## -0.2272407
### Test set diff
diff_tda_kde_5.60.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n5_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.60.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.60.5_nb.n5_test_odds.left<-bst_tda_kde_5.60.5_nb.n5_test$probLeft/bst_tda_kde_5.60.5_nb.n5_test$probRight
bst_tda_kde_5.60.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.60.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4567333
## 
## $winRight
## [1] 0.5432667
# Bayesian Correlated Test

bct_tda_kde_5.60.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n5_test)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n5_test)) #bf_tda_kde_5.60.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n5_test))